gateforge-sdk 0.1.1__tar.gz → 0.2.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gateforge_sdk-0.2.2/PKG-INFO +666 -0
- gateforge_sdk-0.2.2/README.md +617 -0
- gateforge_sdk-0.2.2/gateforge/__init__.py +381 -0
- gateforge_sdk-0.2.2/gateforge/ab/__init__.py +3 -0
- gateforge_sdk-0.2.2/gateforge/ab/engine.py +96 -0
- gateforge_sdk-0.2.2/gateforge/client.py +643 -0
- gateforge_sdk-0.2.2/gateforge/config.py +79 -0
- gateforge_sdk-0.2.2/gateforge/context.py +61 -0
- gateforge_sdk-0.2.2/gateforge/features/__init__.py +61 -0
- gateforge_sdk-0.2.2/gateforge/guardrails/__init__.py +3 -0
- gateforge_sdk-0.2.2/gateforge/guardrails/engine.py +162 -0
- gateforge_sdk-0.2.2/gateforge/metrics.py +28 -0
- gateforge_sdk-0.2.2/gateforge/options.py +66 -0
- gateforge_sdk-0.2.2/gateforge/pricing.py +47 -0
- gateforge_sdk-0.2.2/gateforge/prompt.py +46 -0
- {gateforge_sdk-0.1.1 → gateforge_sdk-0.2.2}/gateforge/providers/anthropic.py +44 -59
- {gateforge_sdk-0.1.1 → gateforge_sdk-0.2.2}/gateforge/providers/gemini.py +56 -70
- {gateforge_sdk-0.1.1 → gateforge_sdk-0.2.2}/gateforge/providers/openai.py +35 -50
- {gateforge_sdk-0.1.1 → gateforge_sdk-0.2.2}/gateforge/response.py +21 -14
- gateforge_sdk-0.2.2/gateforge/tracing.py +337 -0
- gateforge_sdk-0.2.2/gateforge/wrappers/__init__.py +11 -0
- gateforge_sdk-0.2.2/gateforge/wrappers/anthropic.py +281 -0
- gateforge_sdk-0.2.2/gateforge/wrappers/gemini.py +239 -0
- gateforge_sdk-0.2.2/gateforge/wrappers/openai.py +285 -0
- {gateforge_sdk-0.1.1 → gateforge_sdk-0.2.2}/pyproject.toml +51 -47
- gateforge_sdk-0.1.1/PKG-INFO +0 -365
- gateforge_sdk-0.1.1/README.md +0 -323
- gateforge_sdk-0.1.1/gateforge/__init__.py +0 -60
- gateforge_sdk-0.1.1/gateforge/client.py +0 -119
- gateforge_sdk-0.1.1/gateforge/config.py +0 -38
- gateforge_sdk-0.1.1/gateforge/metrics.py +0 -25
- {gateforge_sdk-0.1.1 → gateforge_sdk-0.2.2}/.env.example +0 -0
- {gateforge_sdk-0.1.1 → gateforge_sdk-0.2.2}/.gitignore +0 -0
- {gateforge_sdk-0.1.1 → gateforge_sdk-0.2.2}/.pypirc.example +0 -0
- {gateforge_sdk-0.1.1 → gateforge_sdk-0.2.2}/INSTALL.md +0 -0
- {gateforge_sdk-0.1.1 → gateforge_sdk-0.2.2}/LICENSE +0 -0
- {gateforge_sdk-0.1.1 → gateforge_sdk-0.2.2}/MANIFEST.in +0 -0
- {gateforge_sdk-0.1.1 → gateforge_sdk-0.2.2}/PUBLISHING.md +0 -0
- {gateforge_sdk-0.1.1 → gateforge_sdk-0.2.2}/PUBLISH_NOW.md +0 -0
- {gateforge_sdk-0.1.1 → gateforge_sdk-0.2.2}/gateforge/pii.py +0 -0
- {gateforge_sdk-0.1.1 → gateforge_sdk-0.2.2}/gateforge/providers/__init__.py +0 -0
- {gateforge_sdk-0.1.1 → gateforge_sdk-0.2.2}/tests/__init__.py +0 -0
- {gateforge_sdk-0.1.1 → gateforge_sdk-0.2.2}/tests/test_metrics.py +0 -0
- {gateforge_sdk-0.1.1 → gateforge_sdk-0.2.2}/tests/test_pii.py +0 -0
- {gateforge_sdk-0.1.1 → gateforge_sdk-0.2.2}/tests/test_providers.py +0 -0
|
@@ -0,0 +1,666 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: gateforge-sdk
|
|
3
|
+
Version: 0.2.2
|
|
4
|
+
Summary: Privacy-first LLMOps SDK — Automatic PII masking, cost tracking, and prompt management for LLM applications
|
|
5
|
+
Project-URL: Homepage, https://gateforge.dev
|
|
6
|
+
Project-URL: Documentation, https://gateforge.dev/docs
|
|
7
|
+
Project-URL: Repository, https://github.com/gateforge/gateforge-sdk
|
|
8
|
+
Project-URL: Dashboard, https://app.gateforge.dev
|
|
9
|
+
Project-URL: Bug Tracker, https://github.com/gateforge/gateforge-sdk/issues
|
|
10
|
+
Author-email: Gateforge Team <support@gateforge.dev>
|
|
11
|
+
License: MIT
|
|
12
|
+
License-File: LICENSE
|
|
13
|
+
Keywords: anthropic,gemini,llm,llmops,mlops,openai,pii,privacy
|
|
14
|
+
Classifier: Development Status :: 4 - Beta
|
|
15
|
+
Classifier: Intended Audience :: Developers
|
|
16
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
17
|
+
Classifier: Programming Language :: Python :: 3
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Topic :: Security
|
|
22
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
23
|
+
Requires-Python: >=3.10
|
|
24
|
+
Requires-Dist: httpx>=0.27.0
|
|
25
|
+
Requires-Dist: pii-firewall[langdetect,presidio,transformers]
|
|
26
|
+
Provides-Extra: all
|
|
27
|
+
Requires-Dist: anthropic>=0.40.0; extra == 'all'
|
|
28
|
+
Requires-Dist: google-genai>=1.0.0; extra == 'all'
|
|
29
|
+
Requires-Dist: openai>=1.0.0; extra == 'all'
|
|
30
|
+
Requires-Dist: opentelemetry-api>=1.20.0; extra == 'all'
|
|
31
|
+
Requires-Dist: opentelemetry-exporter-otlp-proto-grpc>=1.20.0; extra == 'all'
|
|
32
|
+
Requires-Dist: opentelemetry-sdk>=1.20.0; extra == 'all'
|
|
33
|
+
Provides-Extra: anthropic
|
|
34
|
+
Requires-Dist: anthropic>=0.40.0; extra == 'anthropic'
|
|
35
|
+
Provides-Extra: dev
|
|
36
|
+
Requires-Dist: build>=1.0.0; extra == 'dev'
|
|
37
|
+
Requires-Dist: pytest-asyncio>=0.23.0; extra == 'dev'
|
|
38
|
+
Requires-Dist: pytest>=8.0.0; extra == 'dev'
|
|
39
|
+
Requires-Dist: twine>=5.0.0; extra == 'dev'
|
|
40
|
+
Provides-Extra: gemini
|
|
41
|
+
Requires-Dist: google-genai>=1.0.0; extra == 'gemini'
|
|
42
|
+
Provides-Extra: openai
|
|
43
|
+
Requires-Dist: openai>=1.0.0; extra == 'openai'
|
|
44
|
+
Provides-Extra: otel
|
|
45
|
+
Requires-Dist: opentelemetry-api>=1.20.0; extra == 'otel'
|
|
46
|
+
Requires-Dist: opentelemetry-exporter-otlp-proto-grpc>=1.20.0; extra == 'otel'
|
|
47
|
+
Requires-Dist: opentelemetry-sdk>=1.20.0; extra == 'otel'
|
|
48
|
+
Description-Content-Type: text/markdown
|
|
49
|
+
|
|
50
|
+
# Gateforge SDK (Python)
|
|
51
|
+
|
|
52
|
+
**Privacy-first LLMOps SDK** — transparent client wrapping with automatic PII masking, cost tracking, A/B prompt testing, guardrails, and agent trace support.
|
|
53
|
+
|
|
54
|
+
[](https://www.python.org/downloads/)
|
|
55
|
+
[](LICENSE)
|
|
56
|
+
|
|
57
|
+
## What it does
|
|
58
|
+
|
|
59
|
+
Gateforge wraps your existing provider client (OpenAI, Anthropic, Gemini) with a transparent proxy. Your code is unchanged — the SDK intercepts each call to run the full pipeline locally before anything reaches the provider:
|
|
60
|
+
|
|
61
|
+
```
|
|
62
|
+
Your code
|
|
63
|
+
│
|
|
64
|
+
▼
|
|
65
|
+
pre-call → A/B variant selection → system prompt injection
|
|
66
|
+
→ PII anonymize (local)
|
|
67
|
+
→ input guardrail check (local rules)
|
|
68
|
+
│
|
|
69
|
+
▼
|
|
70
|
+
LLM provider (sees masked content only)
|
|
71
|
+
│
|
|
72
|
+
▼
|
|
73
|
+
post-call → PII rehydrate (local)
|
|
74
|
+
→ output guardrail check
|
|
75
|
+
→ cost + latency compute
|
|
76
|
+
→ span emission (fire-and-forget, metadata only)
|
|
77
|
+
│
|
|
78
|
+
▼
|
|
79
|
+
Your code receives: original PII restored, guardrails applied
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
Content never leaves your environment. Only metadata (tokens, cost, latency, PII types) is sent to Gateforge.
|
|
83
|
+
|
|
84
|
+
---
|
|
85
|
+
|
|
86
|
+
## Installation
|
|
87
|
+
|
|
88
|
+
```bash
|
|
89
|
+
pip install gateforge-sdk
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
With provider extras:
|
|
93
|
+
```bash
|
|
94
|
+
pip install gateforge-sdk[openai] # OpenAI only
|
|
95
|
+
pip install gateforge-sdk[anthropic] # Anthropic only
|
|
96
|
+
pip install gateforge-sdk[gemini] # Google Gemini only
|
|
97
|
+
pip install gateforge-sdk[all] # All providers
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
Get your API key at [https://app.gateforge.dev/dashboard/keys](https://app.gateforge.dev/dashboard/keys).
|
|
101
|
+
|
|
102
|
+
---
|
|
103
|
+
|
|
104
|
+
## Quick Start
|
|
105
|
+
|
|
106
|
+
### 1. Initialize once at startup
|
|
107
|
+
|
|
108
|
+
```python
|
|
109
|
+
import gateforge
|
|
110
|
+
|
|
111
|
+
gateforge.init(
|
|
112
|
+
api_key="gf-live-YOUR_GATEFORGE_KEY",
|
|
113
|
+
# Optional: provider keys if you use gateforge.chat() high-level API
|
|
114
|
+
# openai_key="sk-...",
|
|
115
|
+
)
|
|
116
|
+
# Downloads config (PII patterns, guardrail rules, A/B experiments)
|
|
117
|
+
# and starts a background refresh thread every 5 minutes
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
### 2. Wrap your provider client
|
|
121
|
+
|
|
122
|
+
```python
|
|
123
|
+
import gateforge
|
|
124
|
+
from openai import OpenAI
|
|
125
|
+
|
|
126
|
+
gateforge.init(api_key="gf-live-...")
|
|
127
|
+
|
|
128
|
+
client = gateforge.wrap_openai(OpenAI(api_key="sk-..."))
|
|
129
|
+
|
|
130
|
+
# Use exactly as before — the pipeline runs automatically
|
|
131
|
+
response = client.chat.completions.create(
|
|
132
|
+
model="gpt-4o-mini",
|
|
133
|
+
messages=[{"role": "user", "content": "My SSN is 123-45-6789. Explain diabetes."}],
|
|
134
|
+
)
|
|
135
|
+
print(response.choices[0].message.content)
|
|
136
|
+
# → Provider saw "<SSN_1>". Response rehydrated with original value.
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
Or use **auto-detection** to wrap any supported client:
|
|
140
|
+
|
|
141
|
+
```python
|
|
142
|
+
client = gateforge.wrap(OpenAI(api_key="sk-...")) # detects OpenAI
|
|
143
|
+
client = gateforge.wrap(Anthropic(api_key="sk-ant-...")) # detects Anthropic
|
|
144
|
+
client = gateforge.wrap(genai.Client(api_key="AIza-...")) # detects Gemini
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
---
|
|
148
|
+
|
|
149
|
+
## Provider Wrappers
|
|
150
|
+
|
|
151
|
+
All wrappers are transparent — input params and return types are identical to the underlying provider SDK.
|
|
152
|
+
|
|
153
|
+
### OpenAI
|
|
154
|
+
|
|
155
|
+
```python
|
|
156
|
+
import gateforge
|
|
157
|
+
from openai import OpenAI, AsyncOpenAI
|
|
158
|
+
|
|
159
|
+
gateforge.init(api_key="gf-live-...")
|
|
160
|
+
|
|
161
|
+
# Sync
|
|
162
|
+
client = gateforge.wrap_openai(OpenAI(api_key="sk-..."))
|
|
163
|
+
response = client.chat.completions.create(
|
|
164
|
+
model="gpt-4o-mini",
|
|
165
|
+
messages=[{"role": "user", "content": "Hello!"}],
|
|
166
|
+
)
|
|
167
|
+
print(response.choices[0].message.content)
|
|
168
|
+
|
|
169
|
+
# Async
|
|
170
|
+
async_client = gateforge.wrap_openai(AsyncOpenAI(api_key="sk-..."))
|
|
171
|
+
response = await async_client.chat.completions.create(
|
|
172
|
+
model="gpt-4o-mini",
|
|
173
|
+
messages=[{"role": "user", "content": "Hello!"}],
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
# Streaming — metrics emitted after the stream ends
|
|
177
|
+
for chunk in client.chat.completions.create(
|
|
178
|
+
model="gpt-4o-mini",
|
|
179
|
+
messages=[{"role": "user", "content": "Tell me a story"}],
|
|
180
|
+
stream=True,
|
|
181
|
+
):
|
|
182
|
+
if chunk.choices[0].delta.content:
|
|
183
|
+
print(chunk.choices[0].delta.content, end="", flush=True)
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
### Anthropic
|
|
187
|
+
|
|
188
|
+
```python
|
|
189
|
+
import gateforge
|
|
190
|
+
from anthropic import Anthropic, AsyncAnthropic
|
|
191
|
+
|
|
192
|
+
gateforge.init(api_key="gf-live-...")
|
|
193
|
+
|
|
194
|
+
client = gateforge.wrap_anthropic(Anthropic(api_key="sk-ant-..."))
|
|
195
|
+
response = client.messages.create(
|
|
196
|
+
model="claude-haiku-4-5",
|
|
197
|
+
max_tokens=1024,
|
|
198
|
+
messages=[{"role": "user", "content": "My phone is +1-555-1234. Help me."}],
|
|
199
|
+
)
|
|
200
|
+
print(response.content[0].text)
|
|
201
|
+
|
|
202
|
+
# Async
|
|
203
|
+
async_client = gateforge.wrap_anthropic(AsyncAnthropic(api_key="sk-ant-..."))
|
|
204
|
+
response = await async_client.messages.create(
|
|
205
|
+
model="claude-haiku-4-5",
|
|
206
|
+
max_tokens=1024,
|
|
207
|
+
messages=[{"role": "user", "content": "Hello!"}],
|
|
208
|
+
)
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
### Gemini
|
|
212
|
+
|
|
213
|
+
```python
|
|
214
|
+
import gateforge
|
|
215
|
+
from google import genai
|
|
216
|
+
|
|
217
|
+
gateforge.init(api_key="gf-live-...")
|
|
218
|
+
|
|
219
|
+
client = gateforge.wrap_gemini(genai.Client(api_key="AIza-..."))
|
|
220
|
+
response = client.models.generate_content(
|
|
221
|
+
model="gemini-2.5-flash",
|
|
222
|
+
contents=[{"role": "user", "parts": [{"text": "Hello!"}]}],
|
|
223
|
+
)
|
|
224
|
+
print(response.text)
|
|
225
|
+
```
|
|
226
|
+
|
|
227
|
+
---
|
|
228
|
+
|
|
229
|
+
## Agent Trace Support
|
|
230
|
+
|
|
231
|
+
For multi-step agent loops, wrap the entire run in `gateforge.trace()`. The SDK automatically assigns step numbers and groups all spans — LLM calls and tool calls — under the same conversation ID.
|
|
232
|
+
|
|
233
|
+
### Basic multi-step trace
|
|
234
|
+
|
|
235
|
+
```python
|
|
236
|
+
import gateforge
|
|
237
|
+
from openai import OpenAI
|
|
238
|
+
|
|
239
|
+
gateforge.init(api_key="gf-live-...")
|
|
240
|
+
client = gateforge.wrap_openai(OpenAI(api_key="sk-..."))
|
|
241
|
+
|
|
242
|
+
with gateforge.trace(conversation_id="conv_abc123"):
|
|
243
|
+
# Step 1 — each LLM call inside the block gets an auto-incremented step
|
|
244
|
+
r1 = client.chat.completions.create(
|
|
245
|
+
model="gpt-4o-mini",
|
|
246
|
+
messages=[{"role": "user", "content": "What tools do I need to book a flight?"}],
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
# Step 2
|
|
250
|
+
r2 = client.chat.completions.create(
|
|
251
|
+
model="gpt-4o-mini",
|
|
252
|
+
messages=[
|
|
253
|
+
{"role": "user", "content": "What tools do I need to book a flight?"},
|
|
254
|
+
{"role": "assistant", "content": r1.choices[0].message.content},
|
|
255
|
+
{"role": "user", "content": "Search for flights to Paris on June 10."},
|
|
256
|
+
],
|
|
257
|
+
)
|
|
258
|
+
# → Dashboard shows: conv_abc123 | step 1 (LLM, gpt-4o-mini) → step 2 (LLM, gpt-4o-mini)
|
|
259
|
+
```
|
|
260
|
+
|
|
261
|
+
### Agent with tool calls
|
|
262
|
+
|
|
263
|
+
```python
|
|
264
|
+
import time
|
|
265
|
+
import gateforge
|
|
266
|
+
from openai import OpenAI
|
|
267
|
+
|
|
268
|
+
gateforge.init(api_key="gf-live-...")
|
|
269
|
+
client = gateforge.wrap_openai(OpenAI(api_key="sk-..."))
|
|
270
|
+
|
|
271
|
+
def search_flights(destination: str, date: str) -> list[dict]:
|
|
272
|
+
...
|
|
273
|
+
|
|
274
|
+
def book_flight(flight_id: str, passenger: str) -> str:
|
|
275
|
+
...
|
|
276
|
+
|
|
277
|
+
with gateforge.trace() as t:
|
|
278
|
+
print(f"Trace ID: {t.conversation_id}") # auto-generated UUID
|
|
279
|
+
|
|
280
|
+
# Step 1 — router decides what to do
|
|
281
|
+
plan = client.chat.completions.create(
|
|
282
|
+
model="gpt-4o-mini",
|
|
283
|
+
messages=[{"role": "user", "content": "Book me a flight to Paris for June 10"}],
|
|
284
|
+
)
|
|
285
|
+
|
|
286
|
+
# Step 2 — tool call (manually recorded so it appears in the waterfall)
|
|
287
|
+
t0 = time.perf_counter()
|
|
288
|
+
flights = search_flights("Paris", "2026-06-10")
|
|
289
|
+
gateforge.record_tool_call(
|
|
290
|
+
"search_flights",
|
|
291
|
+
latency_ms=(time.perf_counter() - t0) * 1000,
|
|
292
|
+
metadata={"destination": "Paris", "date": "2026-06-10", "results": len(flights)},
|
|
293
|
+
)
|
|
294
|
+
|
|
295
|
+
# Step 3 — synthesize results
|
|
296
|
+
selection = client.chat.completions.create(
|
|
297
|
+
model="gpt-4o-mini",
|
|
298
|
+
messages=[
|
|
299
|
+
{"role": "user", "content": "Book me a flight to Paris for June 10"},
|
|
300
|
+
{"role": "assistant", "content": plan.choices[0].message.content},
|
|
301
|
+
{"role": "user", "content": f"Available flights: {flights}. Pick the best one."},
|
|
302
|
+
],
|
|
303
|
+
)
|
|
304
|
+
|
|
305
|
+
# Step 4 — confirm booking
|
|
306
|
+
t0 = time.perf_counter()
|
|
307
|
+
confirmation = book_flight(flights[0]["id"], "John Doe")
|
|
308
|
+
gateforge.record_tool_result(
|
|
309
|
+
"book_flight",
|
|
310
|
+
latency_ms=(time.perf_counter() - t0) * 1000,
|
|
311
|
+
metadata={"confirmation_code": confirmation},
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
# Dashboard waterfall:
|
|
315
|
+
# conv_xyz | user: "Book me a flight to Paris for June 10"
|
|
316
|
+
# ├─ Step 1 LLM gpt-4o-mini 340 tok $0.002 820ms
|
|
317
|
+
# ├─ Step 2 tool search_flights 210ms
|
|
318
|
+
# ├─ Step 3 LLM gpt-4o-mini 980 tok $0.008 1400ms
|
|
319
|
+
# └─ Step 4 tool book_flight 95ms
|
|
320
|
+
# Total: 1320 tokens | $0.010 | 2.5s
|
|
321
|
+
```
|
|
322
|
+
|
|
323
|
+
### Multi-agent (multiple wrapped clients in one trace)
|
|
324
|
+
|
|
325
|
+
```python
|
|
326
|
+
import gateforge
|
|
327
|
+
from openai import OpenAI
|
|
328
|
+
from anthropic import Anthropic
|
|
329
|
+
|
|
330
|
+
gateforge.init(api_key="gf-live-...")
|
|
331
|
+
openai_client = gateforge.wrap_openai(OpenAI(api_key="sk-..."))
|
|
332
|
+
claude_client = gateforge.wrap_anthropic(Anthropic(api_key="sk-ant-..."))
|
|
333
|
+
|
|
334
|
+
with gateforge.trace(conversation_id="conv_multiagent"):
|
|
335
|
+
# Step 1 — GPT as router
|
|
336
|
+
routing = openai_client.chat.completions.create(
|
|
337
|
+
model="gpt-4o-mini",
|
|
338
|
+
messages=[{"role": "user", "content": "Summarize and translate this document."}],
|
|
339
|
+
)
|
|
340
|
+
|
|
341
|
+
# Step 2 — Claude as specialist
|
|
342
|
+
summary = claude_client.messages.create(
|
|
343
|
+
model="claude-haiku-4-5",
|
|
344
|
+
max_tokens=512,
|
|
345
|
+
messages=[{"role": "user", "content": "Summarize: " + document_text}],
|
|
346
|
+
)
|
|
347
|
+
|
|
348
|
+
# Step 3 — GPT for final output
|
|
349
|
+
result = openai_client.chat.completions.create(
|
|
350
|
+
model="gpt-4o-mini",
|
|
351
|
+
messages=[
|
|
352
|
+
{"role": "user", "content": f"Translate to Spanish: {summary.content[0].text}"}
|
|
353
|
+
],
|
|
354
|
+
)
|
|
355
|
+
# All three steps appear in the same trace waterfall, regardless of provider
|
|
356
|
+
```
|
|
357
|
+
|
|
358
|
+
### Async agent
|
|
359
|
+
|
|
360
|
+
```python
|
|
361
|
+
import asyncio
|
|
362
|
+
import gateforge
|
|
363
|
+
from openai import AsyncOpenAI
|
|
364
|
+
|
|
365
|
+
gateforge.init(api_key="gf-live-...")
|
|
366
|
+
|
|
367
|
+
async def run_agent(user_message: str, conversation_id: str):
|
|
368
|
+
client = gateforge.wrap_openai(AsyncOpenAI(api_key="sk-..."))
|
|
369
|
+
|
|
370
|
+
with gateforge.trace(conversation_id=conversation_id):
|
|
371
|
+
r1 = await client.chat.completions.create(
|
|
372
|
+
model="gpt-4o-mini",
|
|
373
|
+
messages=[{"role": "user", "content": user_message}],
|
|
374
|
+
)
|
|
375
|
+
r2 = await client.chat.completions.create(
|
|
376
|
+
model="gpt-4o-mini",
|
|
377
|
+
messages=[
|
|
378
|
+
{"role": "user", "content": user_message},
|
|
379
|
+
{"role": "assistant", "content": r1.choices[0].message.content},
|
|
380
|
+
{"role": "user", "content": "Now summarize in one sentence."},
|
|
381
|
+
],
|
|
382
|
+
)
|
|
383
|
+
return r2.choices[0].message.content
|
|
384
|
+
|
|
385
|
+
asyncio.run(run_agent("Explain quantum entanglement", "conv_001"))
|
|
386
|
+
```
|
|
387
|
+
|
|
388
|
+
### Per-call conversation ID (without context manager)
|
|
389
|
+
|
|
390
|
+
When you can't use a context manager (e.g., in a framework that manages request scope), pass `CallOptions` directly:
|
|
391
|
+
|
|
392
|
+
```python
|
|
393
|
+
from gateforge import CallOptions
|
|
394
|
+
|
|
395
|
+
response = client.chat.completions.create(
|
|
396
|
+
model="gpt-4o-mini",
|
|
397
|
+
messages=[{"role": "user", "content": "Hello!"}],
|
|
398
|
+
gateforge_options=CallOptions(conversation_id="conv_xyz", trace=True),
|
|
399
|
+
)
|
|
400
|
+
```
|
|
401
|
+
|
|
402
|
+
---
|
|
403
|
+
|
|
404
|
+
## A/B Prompt Testing
|
|
405
|
+
|
|
406
|
+
Create experiments in the dashboard, then the SDK assigns variants deterministically at call time — no network round-trip, zero added latency.
|
|
407
|
+
|
|
408
|
+
```python
|
|
409
|
+
from gateforge import CallOptions
|
|
410
|
+
|
|
411
|
+
# Pin to a specific experiment; assign variant based on user session
|
|
412
|
+
response = client.chat.completions.create(
|
|
413
|
+
model="gpt-4o-mini",
|
|
414
|
+
messages=[{"role": "user", "content": "Help me write an email"}],
|
|
415
|
+
gateforge_options=CallOptions(
|
|
416
|
+
experiment_id="exp_email_v2",
|
|
417
|
+
session_id="user_123", # same user always gets the same variant
|
|
418
|
+
),
|
|
419
|
+
)
|
|
420
|
+
# Variant A or B system prompt injected automatically.
|
|
421
|
+
# experiment_id and variant are included in telemetry for dashboard comparison.
|
|
422
|
+
```
|
|
423
|
+
|
|
424
|
+
Variant assignment is a deterministic hash of `(experiment_id, session_id)` — no network call at call time.
|
|
425
|
+
|
|
426
|
+
Inside a trace, A/B telemetry is automatically attached to the correct step:
|
|
427
|
+
|
|
428
|
+
```python
|
|
429
|
+
with gateforge.trace(conversation_id="conv_xyz"):
|
|
430
|
+
response = client.chat.completions.create(
|
|
431
|
+
model="gpt-4o-mini",
|
|
432
|
+
messages=[{"role": "user", "content": "Draft a subject line"}],
|
|
433
|
+
gateforge_options=CallOptions(experiment_id="exp_subject_lines", session_id="user_123"),
|
|
434
|
+
)
|
|
435
|
+
```
|
|
436
|
+
|
|
437
|
+
---
|
|
438
|
+
|
|
439
|
+
## Guardrails
|
|
440
|
+
|
|
441
|
+
Define guardrail rules in the dashboard. The SDK evaluates them locally against downloaded rules — no added latency, works offline. Violations are reported to the dashboard asynchronously.
|
|
442
|
+
|
|
443
|
+
```python
|
|
444
|
+
from gateforge import CallOptions, GuardrailBlocked
|
|
445
|
+
|
|
446
|
+
# Guardrails run automatically when guardrails_enabled is set in your config.
|
|
447
|
+
# You can override per-call:
|
|
448
|
+
try:
|
|
449
|
+
response = client.chat.completions.create(
|
|
450
|
+
model="gpt-4o-mini",
|
|
451
|
+
messages=[{"role": "user", "content": "Tell me how to pick a lock"}],
|
|
452
|
+
gateforge_options=CallOptions(guardrails=True),
|
|
453
|
+
)
|
|
454
|
+
except GuardrailBlocked as e:
|
|
455
|
+
print(f"Blocked by rule: {e.rule_id}")
|
|
456
|
+
response_text = "I can't help with that."
|
|
457
|
+
```
|
|
458
|
+
|
|
459
|
+
Configure `on_fail` behavior per rule in the dashboard:
|
|
460
|
+
|
|
461
|
+
| `on_fail` | Behavior |
|
|
462
|
+
|---|---|
|
|
463
|
+
| `block` | Raises `GuardrailBlocked` |
|
|
464
|
+
| `warn` | Returns response with `.warnings` attached |
|
|
465
|
+
| `retry` | Re-calls LLM up to N times with a modified prompt |
|
|
466
|
+
| `fallback` | Returns a static fallback response |
|
|
467
|
+
|
|
468
|
+
Input guardrails run before the LLM call. Output guardrails run after.
|
|
469
|
+
|
|
470
|
+
---
|
|
471
|
+
|
|
472
|
+
## PII Protection
|
|
473
|
+
|
|
474
|
+
### What gets detected
|
|
475
|
+
|
|
476
|
+
- **Personal**: names, emails, phone numbers, addresses
|
|
477
|
+
- **Financial**: credit cards, bank accounts, SSN, tax IDs
|
|
478
|
+
- **Healthcare**: medical record numbers, symptoms, diagnoses
|
|
479
|
+
- **Technical**: IP addresses, URLs, API keys
|
|
480
|
+
- **Custom**: your own regex patterns (configured in the dashboard)
|
|
481
|
+
|
|
482
|
+
### Domain-specific detection
|
|
483
|
+
|
|
484
|
+
```python
|
|
485
|
+
from gateforge import CallOptions
|
|
486
|
+
|
|
487
|
+
# Override PII domain per call
|
|
488
|
+
response = client.chat.completions.create(
|
|
489
|
+
model="gpt-4o-mini",
|
|
490
|
+
messages=[{
|
|
491
|
+
"role": "user",
|
|
492
|
+
"content": "Patient John Doe, SSN 123-45-6789, has hypertension."
|
|
493
|
+
}],
|
|
494
|
+
gateforge_options=CallOptions(pii_domain="healthcare"),
|
|
495
|
+
)
|
|
496
|
+
# Detected: PERSON, SSN, MEDICAL_CONDITION
|
|
497
|
+
```
|
|
498
|
+
|
|
499
|
+
### Direct anonymization
|
|
500
|
+
|
|
501
|
+
```python
|
|
502
|
+
result = gateforge.anonymize("My email is john@example.com")
|
|
503
|
+
print(result["sanitized"]) # "My email is [EMAIL_001]"
|
|
504
|
+
print(result["entities"]) # ["EMAIL"]
|
|
505
|
+
|
|
506
|
+
original = gateforge.rehydrate("[EMAIL_001] confirmed", context=result["context"])
|
|
507
|
+
print(original) # "john@example.com confirmed"
|
|
508
|
+
```
|
|
509
|
+
|
|
510
|
+
---
|
|
511
|
+
|
|
512
|
+
## CallOptions Reference
|
|
513
|
+
|
|
514
|
+
Pass `gateforge_options=CallOptions(...)` to any wrapped `.create()` call to override defaults for that call only:
|
|
515
|
+
|
|
516
|
+
```python
|
|
517
|
+
from gateforge import CallOptions
|
|
518
|
+
|
|
519
|
+
response = client.chat.completions.create(
|
|
520
|
+
model="gpt-4o-mini",
|
|
521
|
+
messages=[...],
|
|
522
|
+
gateforge_options=CallOptions(
|
|
523
|
+
# Trace grouping
|
|
524
|
+
conversation_id="conv_abc", # attach this call to a conversation
|
|
525
|
+
trace=True, # enable trace events for this call
|
|
526
|
+
|
|
527
|
+
# A/B testing
|
|
528
|
+
experiment_id="exp_abc", # target a specific experiment
|
|
529
|
+
session_id="user_123", # deterministic variant assignment
|
|
530
|
+
|
|
531
|
+
# Feature overrides (None = use global config)
|
|
532
|
+
pii=True, # force PII on for this call
|
|
533
|
+
guardrails=True, # force guardrails on
|
|
534
|
+
track_cost=False, # skip cost tracking for this call
|
|
535
|
+
track_latency=True,
|
|
536
|
+
track_tokens=True,
|
|
537
|
+
ab=False, # skip A/B for this call
|
|
538
|
+
),
|
|
539
|
+
)
|
|
540
|
+
```
|
|
541
|
+
|
|
542
|
+
---
|
|
543
|
+
|
|
544
|
+
## Simple API (high-level)
|
|
545
|
+
|
|
546
|
+
For quick prototyping. Returns a `GatforgeResponse` rather than the native provider response.
|
|
547
|
+
|
|
548
|
+
```python
|
|
549
|
+
import gateforge
|
|
550
|
+
|
|
551
|
+
gateforge.init(
|
|
552
|
+
api_key="gf-live-...",
|
|
553
|
+
openai_key="sk-...",
|
|
554
|
+
anthropic_key="sk-ant-...",
|
|
555
|
+
gemini_key="AIza-...",
|
|
556
|
+
)
|
|
557
|
+
|
|
558
|
+
response = gateforge.chat(
|
|
559
|
+
model="gpt-4o-mini",
|
|
560
|
+
messages=[{"role": "user", "content": "My email is john@example.com, help me."}],
|
|
561
|
+
)
|
|
562
|
+
|
|
563
|
+
print(response.content) # "Hello! I'd be happy to help..."
|
|
564
|
+
print(response.pii_detected) # ['EMAIL']
|
|
565
|
+
print(response.cost_usd) # 0.000023
|
|
566
|
+
print(response.latency_ms) # 412.0
|
|
567
|
+
print(response.prompt_tokens) # 18
|
|
568
|
+
print(response.completion_tokens) # 27
|
|
569
|
+
```
|
|
570
|
+
|
|
571
|
+
> For production use, prefer `gateforge.wrap_openai()` / `wrap_anthropic()` / `wrap_gemini()` — they return the native provider response object with all fields intact.
|
|
572
|
+
|
|
573
|
+
---
|
|
574
|
+
|
|
575
|
+
## OTel Integration
|
|
576
|
+
|
|
577
|
+
If your application already has an OpenTelemetry pipeline, Gateforge emits `gen_ai.*` spans into it automatically — no extra config needed. To set up OTel from scratch:
|
|
578
|
+
|
|
579
|
+
```bash
|
|
580
|
+
pip install gateforge-sdk[otel]
|
|
581
|
+
```
|
|
582
|
+
|
|
583
|
+
```python
|
|
584
|
+
import gateforge
|
|
585
|
+
from gateforge import configure_otel
|
|
586
|
+
|
|
587
|
+
configure_otel(
|
|
588
|
+
endpoint="http://localhost:4317", # your OTLP collector
|
|
589
|
+
service_name="my-llm-app",
|
|
590
|
+
)
|
|
591
|
+
|
|
592
|
+
gateforge.init(api_key="gf-live-...")
|
|
593
|
+
client = gateforge.wrap_openai(OpenAI(api_key="sk-..."))
|
|
594
|
+
|
|
595
|
+
# Every LLM call now emits a gen_ai span into your OTel pipeline
|
|
596
|
+
response = client.chat.completions.create(model="gpt-4o-mini", messages=[...])
|
|
597
|
+
```
|
|
598
|
+
|
|
599
|
+
---
|
|
600
|
+
|
|
601
|
+
## Supported Models
|
|
602
|
+
|
|
603
|
+
### OpenAI
|
|
604
|
+
- GPT-4o, GPT-4o-mini
|
|
605
|
+
- GPT-4.1, GPT-4.1-mini, GPT-4.1-nano
|
|
606
|
+
|
|
607
|
+
### Anthropic
|
|
608
|
+
- Claude Haiku 4-5
|
|
609
|
+
- Claude Sonnet 4-5
|
|
610
|
+
- Claude Opus 4
|
|
611
|
+
|
|
612
|
+
### Google Gemini
|
|
613
|
+
- Gemini 2.5 Flash
|
|
614
|
+
- Gemini 2.5 Pro
|
|
615
|
+
|
|
616
|
+
---
|
|
617
|
+
|
|
618
|
+
## Dashboard & Monitoring
|
|
619
|
+
|
|
620
|
+
[https://app.gateforge.dev/dashboard](https://app.gateforge.dev/dashboard)
|
|
621
|
+
|
|
622
|
+
- Request volume and trends
|
|
623
|
+
- Cost breakdown by model and provider
|
|
624
|
+
- Latency analytics
|
|
625
|
+
- PII detection statistics
|
|
626
|
+
- A/B experiment results (variant comparison, significance indicator)
|
|
627
|
+
- Guardrail violation alerts
|
|
628
|
+
- Agent waterfall traces (per-step cost, latency, tool calls)
|
|
629
|
+
- API key management
|
|
630
|
+
|
|
631
|
+
---
|
|
632
|
+
|
|
633
|
+
## Troubleshooting
|
|
634
|
+
|
|
635
|
+
**`ImportError: No module named 'gateforge'`**
|
|
636
|
+
```bash
|
|
637
|
+
pip install gateforge-sdk
|
|
638
|
+
```
|
|
639
|
+
|
|
640
|
+
**`RuntimeError: Call gateforge.init() first`**
|
|
641
|
+
```python
|
|
642
|
+
gateforge.init(api_key="gf-live-...")
|
|
643
|
+
# Call this once before any wrap_*() or chat() calls
|
|
644
|
+
```
|
|
645
|
+
|
|
646
|
+
**PII not detected**
|
|
647
|
+
1. Check the domain setting matches your data (`healthcare`, `finance`, `legal`, `generic`)
|
|
648
|
+
2. `presidio` backend is most accurate; `regex` is fastest
|
|
649
|
+
3. Add custom patterns in the dashboard for domain-specific entities
|
|
650
|
+
|
|
651
|
+
**Steps not appearing in trace waterfall**
|
|
652
|
+
1. Make sure `tracing_enabled=True` in your feature flags (set in dashboard or override with `FeatureFlags`)
|
|
653
|
+
2. Confirm a `conversation_id` is active — either via `gateforge.trace()` or `CallOptions(conversation_id=...)`
|
|
654
|
+
|
|
655
|
+
---
|
|
656
|
+
|
|
657
|
+
## Links
|
|
658
|
+
|
|
659
|
+
- [Website](https://gateforge.dev)
|
|
660
|
+
- [Dashboard](https://app.gateforge.dev)
|
|
661
|
+
- [API Docs](https://api.gateforge.dev/docs)
|
|
662
|
+
- [Issues](https://github.com/gateforge/gateforge-sdk/issues)
|
|
663
|
+
|
|
664
|
+
## License
|
|
665
|
+
|
|
666
|
+
MIT — see [LICENSE](LICENSE). The SDK is open source; the Gateforge service is commercial with a free tier (1,000 requests/month).
|