proxilion 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- proxilion/__init__.py +136 -0
- proxilion/audit/__init__.py +133 -0
- proxilion/audit/base_exporters.py +527 -0
- proxilion/audit/compliance/__init__.py +130 -0
- proxilion/audit/compliance/base.py +457 -0
- proxilion/audit/compliance/eu_ai_act.py +603 -0
- proxilion/audit/compliance/iso27001.py +544 -0
- proxilion/audit/compliance/soc2.py +491 -0
- proxilion/audit/events.py +493 -0
- proxilion/audit/explainability.py +1173 -0
- proxilion/audit/exporters/__init__.py +58 -0
- proxilion/audit/exporters/aws_s3.py +636 -0
- proxilion/audit/exporters/azure_storage.py +608 -0
- proxilion/audit/exporters/cloud_base.py +468 -0
- proxilion/audit/exporters/gcp_storage.py +570 -0
- proxilion/audit/exporters/multi_exporter.py +498 -0
- proxilion/audit/hash_chain.py +652 -0
- proxilion/audit/logger.py +543 -0
- proxilion/caching/__init__.py +49 -0
- proxilion/caching/tool_cache.py +633 -0
- proxilion/context/__init__.py +73 -0
- proxilion/context/context_window.py +556 -0
- proxilion/context/message_history.py +505 -0
- proxilion/context/session.py +735 -0
- proxilion/contrib/__init__.py +51 -0
- proxilion/contrib/anthropic.py +609 -0
- proxilion/contrib/google.py +1012 -0
- proxilion/contrib/langchain.py +641 -0
- proxilion/contrib/mcp.py +893 -0
- proxilion/contrib/openai.py +646 -0
- proxilion/core.py +3058 -0
- proxilion/decorators.py +966 -0
- proxilion/engines/__init__.py +287 -0
- proxilion/engines/base.py +266 -0
- proxilion/engines/casbin_engine.py +412 -0
- proxilion/engines/opa_engine.py +493 -0
- proxilion/engines/simple.py +437 -0
- proxilion/exceptions.py +887 -0
- proxilion/guards/__init__.py +54 -0
- proxilion/guards/input_guard.py +522 -0
- proxilion/guards/output_guard.py +634 -0
- proxilion/observability/__init__.py +198 -0
- proxilion/observability/cost_tracker.py +866 -0
- proxilion/observability/hooks.py +683 -0
- proxilion/observability/metrics.py +798 -0
- proxilion/observability/session_cost_tracker.py +1063 -0
- proxilion/policies/__init__.py +67 -0
- proxilion/policies/base.py +304 -0
- proxilion/policies/builtin.py +486 -0
- proxilion/policies/registry.py +376 -0
- proxilion/providers/__init__.py +201 -0
- proxilion/providers/adapter.py +468 -0
- proxilion/providers/anthropic_adapter.py +330 -0
- proxilion/providers/gemini_adapter.py +391 -0
- proxilion/providers/openai_adapter.py +294 -0
- proxilion/py.typed +0 -0
- proxilion/resilience/__init__.py +81 -0
- proxilion/resilience/degradation.py +615 -0
- proxilion/resilience/fallback.py +555 -0
- proxilion/resilience/retry.py +554 -0
- proxilion/scheduling/__init__.py +57 -0
- proxilion/scheduling/priority_queue.py +419 -0
- proxilion/scheduling/scheduler.py +459 -0
- proxilion/security/__init__.py +244 -0
- proxilion/security/agent_trust.py +968 -0
- proxilion/security/behavioral_drift.py +794 -0
- proxilion/security/cascade_protection.py +869 -0
- proxilion/security/circuit_breaker.py +428 -0
- proxilion/security/cost_limiter.py +690 -0
- proxilion/security/idor_protection.py +460 -0
- proxilion/security/intent_capsule.py +849 -0
- proxilion/security/intent_validator.py +495 -0
- proxilion/security/memory_integrity.py +767 -0
- proxilion/security/rate_limiter.py +509 -0
- proxilion/security/scope_enforcer.py +680 -0
- proxilion/security/sequence_validator.py +636 -0
- proxilion/security/trust_boundaries.py +784 -0
- proxilion/streaming/__init__.py +70 -0
- proxilion/streaming/detector.py +761 -0
- proxilion/streaming/transformer.py +674 -0
- proxilion/timeouts/__init__.py +55 -0
- proxilion/timeouts/decorators.py +477 -0
- proxilion/timeouts/manager.py +545 -0
- proxilion/tools/__init__.py +69 -0
- proxilion/tools/decorators.py +493 -0
- proxilion/tools/registry.py +732 -0
- proxilion/types.py +339 -0
- proxilion/validation/__init__.py +93 -0
- proxilion/validation/pydantic_schema.py +351 -0
- proxilion/validation/schema.py +651 -0
- proxilion-0.0.1.dist-info/METADATA +872 -0
- proxilion-0.0.1.dist-info/RECORD +94 -0
- proxilion-0.0.1.dist-info/WHEEL +4 -0
- proxilion-0.0.1.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,674 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Stream transformation and filtering for LLM responses.
|
|
3
|
+
|
|
4
|
+
Provides utilities for filtering, transforming, and validating
|
|
5
|
+
streaming content before it reaches the client.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import asyncio
|
|
11
|
+
import threading
|
|
12
|
+
from collections.abc import AsyncIterator, Callable, Iterator
|
|
13
|
+
from dataclasses import dataclass, field
|
|
14
|
+
from typing import Any, Generic, Protocol, TypeVar
|
|
15
|
+
|
|
16
|
+
from proxilion.streaming.detector import (
|
|
17
|
+
DetectedToolCall,
|
|
18
|
+
StreamEvent,
|
|
19
|
+
StreamEventType,
|
|
20
|
+
StreamingToolCallDetector,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
T = TypeVar("T")
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class StreamFilter(Protocol):
|
|
27
|
+
"""Protocol for stream content filters."""
|
|
28
|
+
|
|
29
|
+
def __call__(self, content: str) -> str | None:
|
|
30
|
+
"""
|
|
31
|
+
Filter content.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
content: The content to filter.
|
|
35
|
+
|
|
36
|
+
Returns:
|
|
37
|
+
Filtered content, or None to drop the chunk.
|
|
38
|
+
"""
|
|
39
|
+
...
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class StreamValidator(Protocol):
|
|
43
|
+
"""Protocol for stream validators."""
|
|
44
|
+
|
|
45
|
+
def __call__(self, content: str) -> bool:
|
|
46
|
+
"""
|
|
47
|
+
Validate content.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
content: The content to validate.
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
True to continue stream, False to stop.
|
|
54
|
+
"""
|
|
55
|
+
...
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class ToolCallAuthorizer(Protocol):
|
|
59
|
+
"""Protocol for tool call authorization."""
|
|
60
|
+
|
|
61
|
+
def __call__(self, tool_call: DetectedToolCall) -> bool:
|
|
62
|
+
"""
|
|
63
|
+
Authorize a tool call.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
tool_call: The detected tool call.
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
True if authorized, False otherwise.
|
|
70
|
+
"""
|
|
71
|
+
...
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
@dataclass
|
|
75
|
+
class FilteredStream(Generic[T]):
|
|
76
|
+
"""
|
|
77
|
+
An async iterator wrapper that applies filters to a stream.
|
|
78
|
+
|
|
79
|
+
Attributes:
|
|
80
|
+
source: The original async iterator.
|
|
81
|
+
filters: List of filter functions to apply.
|
|
82
|
+
validators: List of validator functions to apply.
|
|
83
|
+
stopped: Whether the stream has been stopped.
|
|
84
|
+
"""
|
|
85
|
+
|
|
86
|
+
source: AsyncIterator[T]
|
|
87
|
+
filters: list[StreamFilter] = field(default_factory=list)
|
|
88
|
+
validators: list[StreamValidator] = field(default_factory=list)
|
|
89
|
+
stopped: bool = False
|
|
90
|
+
_buffer: list[T] = field(default_factory=list)
|
|
91
|
+
|
|
92
|
+
def __aiter__(self) -> AsyncIterator[T]:
|
|
93
|
+
return self
|
|
94
|
+
|
|
95
|
+
async def __anext__(self) -> T:
|
|
96
|
+
if self.stopped:
|
|
97
|
+
raise StopAsyncIteration
|
|
98
|
+
|
|
99
|
+
while True:
|
|
100
|
+
try:
|
|
101
|
+
chunk = await self.source.__anext__()
|
|
102
|
+
except StopAsyncIteration:
|
|
103
|
+
raise
|
|
104
|
+
|
|
105
|
+
# Convert chunk to string for filtering
|
|
106
|
+
if isinstance(chunk, str):
|
|
107
|
+
content = chunk
|
|
108
|
+
elif hasattr(chunk, "content"):
|
|
109
|
+
content = getattr(chunk, "content", "")
|
|
110
|
+
else:
|
|
111
|
+
content = str(chunk)
|
|
112
|
+
|
|
113
|
+
# Apply validators
|
|
114
|
+
for validator in self.validators:
|
|
115
|
+
if not validator(content):
|
|
116
|
+
self.stopped = True
|
|
117
|
+
raise StopAsyncIteration
|
|
118
|
+
|
|
119
|
+
# Apply filters
|
|
120
|
+
result = content
|
|
121
|
+
for filter_fn in self.filters:
|
|
122
|
+
result = filter_fn(result)
|
|
123
|
+
if result is None:
|
|
124
|
+
break # Drop this chunk
|
|
125
|
+
|
|
126
|
+
if result is not None:
|
|
127
|
+
# Return original chunk type if possible
|
|
128
|
+
if isinstance(chunk, str):
|
|
129
|
+
return result # type: ignore
|
|
130
|
+
return chunk
|
|
131
|
+
|
|
132
|
+
def stop(self) -> None:
|
|
133
|
+
"""Stop the stream."""
|
|
134
|
+
self.stopped = True
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
class StreamTransformer:
|
|
138
|
+
"""
|
|
139
|
+
Transform streaming content with filters and validators.
|
|
140
|
+
|
|
141
|
+
Supports both string streams and structured chunk streams,
|
|
142
|
+
applying filters and validators to control content flow.
|
|
143
|
+
|
|
144
|
+
Example:
|
|
145
|
+
>>> transformer = StreamTransformer()
|
|
146
|
+
>>> transformer.add_filter(redact_pii)
|
|
147
|
+
>>> transformer.add_filter(block_sensitive_output)
|
|
148
|
+
>>>
|
|
149
|
+
>>> async for chunk in transformer.transform(original_stream):
|
|
150
|
+
... yield chunk # Filtered content
|
|
151
|
+
|
|
152
|
+
Example with tool call authorization:
|
|
153
|
+
>>> transformer = StreamTransformer()
|
|
154
|
+
>>> transformer.set_tool_call_authorizer(my_authorizer)
|
|
155
|
+
>>>
|
|
156
|
+
>>> async for event in transformer.transform_events(detector_events):
|
|
157
|
+
... if event.type == StreamEventType.TOOL_CALL_END:
|
|
158
|
+
... # Tool call was authorized
|
|
159
|
+
... execute_tool(event.tool_call)
|
|
160
|
+
"""
|
|
161
|
+
|
|
162
|
+
def __init__(self) -> None:
|
|
163
|
+
"""Initialize the transformer."""
|
|
164
|
+
self._filters: list[StreamFilter] = []
|
|
165
|
+
self._validators: list[StreamValidator] = []
|
|
166
|
+
self._tool_call_authorizer: ToolCallAuthorizer | None = None
|
|
167
|
+
self._event_callbacks: list[Callable[[StreamEvent], None]] = []
|
|
168
|
+
self._lock = threading.Lock()
|
|
169
|
+
|
|
170
|
+
def add_filter(self, filter_fn: StreamFilter) -> StreamTransformer:
|
|
171
|
+
"""
|
|
172
|
+
Add a content filter.
|
|
173
|
+
|
|
174
|
+
Filters are applied in order. Return None to drop a chunk.
|
|
175
|
+
|
|
176
|
+
Args:
|
|
177
|
+
filter_fn: Function that takes content and returns filtered content or None.
|
|
178
|
+
|
|
179
|
+
Returns:
|
|
180
|
+
Self for chaining.
|
|
181
|
+
"""
|
|
182
|
+
with self._lock:
|
|
183
|
+
self._filters.append(filter_fn)
|
|
184
|
+
return self
|
|
185
|
+
|
|
186
|
+
def add_validator(self, validator_fn: StreamValidator) -> StreamTransformer:
|
|
187
|
+
"""
|
|
188
|
+
Add a validator.
|
|
189
|
+
|
|
190
|
+
Validators are checked before filters. Return False to stop the stream.
|
|
191
|
+
|
|
192
|
+
Args:
|
|
193
|
+
validator_fn: Function that takes content and returns bool.
|
|
194
|
+
|
|
195
|
+
Returns:
|
|
196
|
+
Self for chaining.
|
|
197
|
+
"""
|
|
198
|
+
with self._lock:
|
|
199
|
+
self._validators.append(validator_fn)
|
|
200
|
+
return self
|
|
201
|
+
|
|
202
|
+
def set_tool_call_authorizer(
|
|
203
|
+
self, authorizer: ToolCallAuthorizer
|
|
204
|
+
) -> StreamTransformer:
|
|
205
|
+
"""
|
|
206
|
+
Set the tool call authorizer.
|
|
207
|
+
|
|
208
|
+
Args:
|
|
209
|
+
authorizer: Function that authorizes tool calls.
|
|
210
|
+
|
|
211
|
+
Returns:
|
|
212
|
+
Self for chaining.
|
|
213
|
+
"""
|
|
214
|
+
self._tool_call_authorizer = authorizer
|
|
215
|
+
return self
|
|
216
|
+
|
|
217
|
+
def add_event_callback(
|
|
218
|
+
self, callback: Callable[[StreamEvent], None]
|
|
219
|
+
) -> StreamTransformer:
|
|
220
|
+
"""
|
|
221
|
+
Add an event callback.
|
|
222
|
+
|
|
223
|
+
Callbacks are invoked for each stream event.
|
|
224
|
+
|
|
225
|
+
Args:
|
|
226
|
+
callback: Function called with each StreamEvent.
|
|
227
|
+
|
|
228
|
+
Returns:
|
|
229
|
+
Self for chaining.
|
|
230
|
+
"""
|
|
231
|
+
with self._lock:
|
|
232
|
+
self._event_callbacks.append(callback)
|
|
233
|
+
return self
|
|
234
|
+
|
|
235
|
+
def clear_filters(self) -> None:
|
|
236
|
+
"""Remove all filters."""
|
|
237
|
+
with self._lock:
|
|
238
|
+
self._filters.clear()
|
|
239
|
+
|
|
240
|
+
def clear_validators(self) -> None:
|
|
241
|
+
"""Remove all validators."""
|
|
242
|
+
with self._lock:
|
|
243
|
+
self._validators.clear()
|
|
244
|
+
|
|
245
|
+
async def transform(
|
|
246
|
+
self,
|
|
247
|
+
stream: AsyncIterator[str],
|
|
248
|
+
) -> AsyncIterator[str]:
|
|
249
|
+
"""
|
|
250
|
+
Transform a string stream with all registered filters.
|
|
251
|
+
|
|
252
|
+
Args:
|
|
253
|
+
stream: The source async iterator of strings.
|
|
254
|
+
|
|
255
|
+
Yields:
|
|
256
|
+
Filtered string chunks.
|
|
257
|
+
"""
|
|
258
|
+
async for chunk in stream:
|
|
259
|
+
# Apply validators
|
|
260
|
+
valid = True
|
|
261
|
+
for validator in self._validators:
|
|
262
|
+
if not validator(chunk):
|
|
263
|
+
valid = False
|
|
264
|
+
break
|
|
265
|
+
|
|
266
|
+
if not valid:
|
|
267
|
+
return # Stop stream
|
|
268
|
+
|
|
269
|
+
# Apply filters
|
|
270
|
+
result: str | None = chunk
|
|
271
|
+
for filter_fn in self._filters:
|
|
272
|
+
if result is None:
|
|
273
|
+
break
|
|
274
|
+
result = filter_fn(result)
|
|
275
|
+
|
|
276
|
+
if result is not None:
|
|
277
|
+
yield result
|
|
278
|
+
|
|
279
|
+
def transform_sync(
|
|
280
|
+
self,
|
|
281
|
+
stream: Iterator[str],
|
|
282
|
+
) -> Iterator[str]:
|
|
283
|
+
"""
|
|
284
|
+
Transform a synchronous string stream.
|
|
285
|
+
|
|
286
|
+
Args:
|
|
287
|
+
stream: The source iterator of strings.
|
|
288
|
+
|
|
289
|
+
Yields:
|
|
290
|
+
Filtered string chunks.
|
|
291
|
+
"""
|
|
292
|
+
for chunk in stream:
|
|
293
|
+
# Apply validators
|
|
294
|
+
valid = True
|
|
295
|
+
for validator in self._validators:
|
|
296
|
+
if not validator(chunk):
|
|
297
|
+
valid = False
|
|
298
|
+
break
|
|
299
|
+
|
|
300
|
+
if not valid:
|
|
301
|
+
return # Stop stream
|
|
302
|
+
|
|
303
|
+
# Apply filters
|
|
304
|
+
result: str | None = chunk
|
|
305
|
+
for filter_fn in self._filters:
|
|
306
|
+
if result is None:
|
|
307
|
+
break
|
|
308
|
+
result = filter_fn(result)
|
|
309
|
+
|
|
310
|
+
if result is not None:
|
|
311
|
+
yield result
|
|
312
|
+
|
|
313
|
+
async def transform_events(
|
|
314
|
+
self,
|
|
315
|
+
stream: AsyncIterator[StreamEvent],
|
|
316
|
+
) -> AsyncIterator[StreamEvent]:
|
|
317
|
+
"""
|
|
318
|
+
Transform a stream of StreamEvents.
|
|
319
|
+
|
|
320
|
+
Applies filters to TEXT events and authorization to tool calls.
|
|
321
|
+
|
|
322
|
+
Args:
|
|
323
|
+
stream: The source async iterator of StreamEvents.
|
|
324
|
+
|
|
325
|
+
Yields:
|
|
326
|
+
Transformed StreamEvent objects.
|
|
327
|
+
"""
|
|
328
|
+
async for event in stream:
|
|
329
|
+
# Invoke callbacks
|
|
330
|
+
for callback in self._event_callbacks:
|
|
331
|
+
callback(event)
|
|
332
|
+
|
|
333
|
+
if event.type == StreamEventType.TEXT:
|
|
334
|
+
# Apply validators and filters to text
|
|
335
|
+
content = event.content or ""
|
|
336
|
+
|
|
337
|
+
valid = True
|
|
338
|
+
for validator in self._validators:
|
|
339
|
+
if not validator(content):
|
|
340
|
+
valid = False
|
|
341
|
+
break
|
|
342
|
+
|
|
343
|
+
if not valid:
|
|
344
|
+
return # Stop stream
|
|
345
|
+
|
|
346
|
+
result: str | None = content
|
|
347
|
+
for filter_fn in self._filters:
|
|
348
|
+
if result is None:
|
|
349
|
+
break
|
|
350
|
+
result = filter_fn(result)
|
|
351
|
+
|
|
352
|
+
if result is not None:
|
|
353
|
+
yield StreamEvent.text(result, event.raw_chunk)
|
|
354
|
+
|
|
355
|
+
elif event.type == StreamEventType.TOOL_CALL_END:
|
|
356
|
+
# Check authorization for tool calls
|
|
357
|
+
if self._tool_call_authorizer and event.tool_call:
|
|
358
|
+
if not self._tool_call_authorizer(event.tool_call):
|
|
359
|
+
# Tool call not authorized - emit error instead
|
|
360
|
+
yield StreamEvent.error_event(
|
|
361
|
+
f"Tool call '{event.tool_call.name}' not authorized",
|
|
362
|
+
event.raw_chunk,
|
|
363
|
+
)
|
|
364
|
+
continue
|
|
365
|
+
|
|
366
|
+
yield event
|
|
367
|
+
|
|
368
|
+
else:
|
|
369
|
+
# Pass through other events
|
|
370
|
+
yield event
|
|
371
|
+
|
|
372
|
+
async def transform_chunks(
|
|
373
|
+
self,
|
|
374
|
+
stream: AsyncIterator[Any],
|
|
375
|
+
detector: StreamingToolCallDetector | None = None,
|
|
376
|
+
) -> AsyncIterator[StreamEvent]:
|
|
377
|
+
"""
|
|
378
|
+
Transform raw LLM chunks into StreamEvents.
|
|
379
|
+
|
|
380
|
+
Optionally detects tool calls and applies filters.
|
|
381
|
+
|
|
382
|
+
Args:
|
|
383
|
+
stream: Raw LLM streaming chunks.
|
|
384
|
+
detector: Optional tool call detector. Creates one if not provided.
|
|
385
|
+
|
|
386
|
+
Yields:
|
|
387
|
+
StreamEvent objects.
|
|
388
|
+
"""
|
|
389
|
+
if detector is None:
|
|
390
|
+
detector = StreamingToolCallDetector()
|
|
391
|
+
|
|
392
|
+
async for chunk in stream:
|
|
393
|
+
events = detector.process_chunk(chunk)
|
|
394
|
+
|
|
395
|
+
for event in events:
|
|
396
|
+
# Apply transformations
|
|
397
|
+
async for transformed in self.transform_events(
|
|
398
|
+
_single_event_iterator(event)
|
|
399
|
+
):
|
|
400
|
+
yield transformed
|
|
401
|
+
|
|
402
|
+
def wrap(self, stream: AsyncIterator[str]) -> FilteredStream[str]:
|
|
403
|
+
"""
|
|
404
|
+
Wrap a stream with the transformer's filters.
|
|
405
|
+
|
|
406
|
+
Args:
|
|
407
|
+
stream: The source async iterator.
|
|
408
|
+
|
|
409
|
+
Returns:
|
|
410
|
+
A FilteredStream with filters applied.
|
|
411
|
+
"""
|
|
412
|
+
return FilteredStream(
|
|
413
|
+
source=stream,
|
|
414
|
+
filters=list(self._filters),
|
|
415
|
+
validators=list(self._validators),
|
|
416
|
+
)
|
|
417
|
+
|
|
418
|
+
|
|
419
|
+
async def _single_event_iterator(event: StreamEvent) -> AsyncIterator[StreamEvent]:
|
|
420
|
+
"""Create an async iterator yielding a single event."""
|
|
421
|
+
yield event
|
|
422
|
+
|
|
423
|
+
|
|
424
|
+
def create_guarded_stream(
|
|
425
|
+
stream: AsyncIterator[str],
|
|
426
|
+
output_guard: Any, # OutputGuard from guards module
|
|
427
|
+
) -> AsyncIterator[str]:
|
|
428
|
+
"""
|
|
429
|
+
Create a stream that's filtered by output guards.
|
|
430
|
+
|
|
431
|
+
Integrates with the OutputGuard from proxilion.guards to
|
|
432
|
+
filter sensitive content from streaming responses.
|
|
433
|
+
|
|
434
|
+
Args:
|
|
435
|
+
stream: The source async iterator of strings.
|
|
436
|
+
output_guard: An OutputGuard instance for content filtering.
|
|
437
|
+
|
|
438
|
+
Returns:
|
|
439
|
+
Async iterator yielding filtered content.
|
|
440
|
+
|
|
441
|
+
Example:
|
|
442
|
+
>>> from proxilion.guards import OutputGuard
|
|
443
|
+
>>> guard = OutputGuard()
|
|
444
|
+
>>> async for chunk in create_guarded_stream(llm_stream, guard):
|
|
445
|
+
... # Chunks are checked for sensitive data
|
|
446
|
+
... ws.send(chunk)
|
|
447
|
+
"""
|
|
448
|
+
# Import here to avoid circular imports
|
|
449
|
+
try:
|
|
450
|
+
from proxilion.guards import GuardAction
|
|
451
|
+
except ImportError:
|
|
452
|
+
# Fallback if guards not available
|
|
453
|
+
class GuardAction:
|
|
454
|
+
ALLOW = "allow"
|
|
455
|
+
BLOCK = "block"
|
|
456
|
+
SANITIZE = "sanitize"
|
|
457
|
+
|
|
458
|
+
transformer = StreamTransformer()
|
|
459
|
+
|
|
460
|
+
def guard_filter(chunk: str) -> str | None:
|
|
461
|
+
result = output_guard.check(chunk)
|
|
462
|
+
if hasattr(result, "action"):
|
|
463
|
+
if result.action == GuardAction.BLOCK:
|
|
464
|
+
return None
|
|
465
|
+
elif result.action == GuardAction.SANITIZE:
|
|
466
|
+
return output_guard.redact(chunk)
|
|
467
|
+
return chunk
|
|
468
|
+
|
|
469
|
+
transformer.add_filter(guard_filter)
|
|
470
|
+
return transformer.transform(stream)
|
|
471
|
+
|
|
472
|
+
|
|
473
|
+
def create_authorization_stream(
|
|
474
|
+
stream: AsyncIterator[Any],
|
|
475
|
+
authorizer: Callable[[DetectedToolCall], bool],
|
|
476
|
+
detector: StreamingToolCallDetector | None = None,
|
|
477
|
+
) -> AsyncIterator[StreamEvent]:
|
|
478
|
+
"""
|
|
479
|
+
Create a stream that authorizes tool calls.
|
|
480
|
+
|
|
481
|
+
Processes raw LLM chunks, detects tool calls, and applies
|
|
482
|
+
authorization before yielding events.
|
|
483
|
+
|
|
484
|
+
Args:
|
|
485
|
+
stream: Raw LLM streaming chunks.
|
|
486
|
+
authorizer: Function to authorize tool calls.
|
|
487
|
+
detector: Optional detector instance.
|
|
488
|
+
|
|
489
|
+
Returns:
|
|
490
|
+
Async iterator yielding StreamEvents.
|
|
491
|
+
|
|
492
|
+
Example:
|
|
493
|
+
>>> def my_authorizer(tool_call):
|
|
494
|
+
... return auth.can(user, "execute", tool_call.name)
|
|
495
|
+
>>>
|
|
496
|
+
>>> async for event in create_authorization_stream(llm_stream, my_authorizer):
|
|
497
|
+
... if event.type == StreamEventType.TOOL_CALL_END:
|
|
498
|
+
... # Tool call is authorized
|
|
499
|
+
... result = execute_tool(event.tool_call)
|
|
500
|
+
"""
|
|
501
|
+
transformer = StreamTransformer()
|
|
502
|
+
transformer.set_tool_call_authorizer(authorizer)
|
|
503
|
+
return transformer.transform_chunks(stream, detector)
|
|
504
|
+
|
|
505
|
+
|
|
506
|
+
class BufferedStreamTransformer:
|
|
507
|
+
r"""
|
|
508
|
+
Stream transformer that buffers content for pattern matching.
|
|
509
|
+
|
|
510
|
+
Useful when you need to detect patterns that may span multiple chunks.
|
|
511
|
+
|
|
512
|
+
Example:
|
|
513
|
+
>>> transformer = BufferedStreamTransformer(buffer_size=1000)
|
|
514
|
+
>>> transformer.add_pattern_filter(r"API_KEY_\w+", "[REDACTED]")
|
|
515
|
+
>>>
|
|
516
|
+
>>> async for chunk in transformer.transform(stream):
|
|
517
|
+
... yield chunk
|
|
518
|
+
"""
|
|
519
|
+
|
|
520
|
+
def __init__(self, buffer_size: int = 500) -> None:
|
|
521
|
+
"""
|
|
522
|
+
Initialize the buffered transformer.
|
|
523
|
+
|
|
524
|
+
Args:
|
|
525
|
+
buffer_size: Maximum buffer size in characters.
|
|
526
|
+
"""
|
|
527
|
+
self.buffer_size = buffer_size
|
|
528
|
+
self._buffer: str = ""
|
|
529
|
+
self._patterns: list[tuple[str, str]] = []
|
|
530
|
+
self._lock = threading.Lock()
|
|
531
|
+
|
|
532
|
+
def add_pattern_filter(
|
|
533
|
+
self, pattern: str, replacement: str
|
|
534
|
+
) -> BufferedStreamTransformer:
|
|
535
|
+
"""
|
|
536
|
+
Add a regex pattern filter.
|
|
537
|
+
|
|
538
|
+
Args:
|
|
539
|
+
pattern: Regex pattern to match.
|
|
540
|
+
replacement: Replacement string.
|
|
541
|
+
|
|
542
|
+
Returns:
|
|
543
|
+
Self for chaining.
|
|
544
|
+
"""
|
|
545
|
+
import re
|
|
546
|
+
|
|
547
|
+
# Validate pattern
|
|
548
|
+
re.compile(pattern)
|
|
549
|
+
with self._lock:
|
|
550
|
+
self._patterns.append((pattern, replacement))
|
|
551
|
+
return self
|
|
552
|
+
|
|
553
|
+
async def transform(self, stream: AsyncIterator[str]) -> AsyncIterator[str]:
|
|
554
|
+
"""
|
|
555
|
+
Transform stream with buffered pattern matching.
|
|
556
|
+
|
|
557
|
+
Args:
|
|
558
|
+
stream: Source async iterator.
|
|
559
|
+
|
|
560
|
+
Yields:
|
|
561
|
+
Filtered content.
|
|
562
|
+
"""
|
|
563
|
+
import re
|
|
564
|
+
|
|
565
|
+
async for chunk in stream:
|
|
566
|
+
self._buffer += chunk
|
|
567
|
+
|
|
568
|
+
# If buffer is large enough, process it
|
|
569
|
+
if len(self._buffer) >= self.buffer_size:
|
|
570
|
+
# Apply patterns
|
|
571
|
+
result = self._buffer
|
|
572
|
+
for pattern, replacement in self._patterns:
|
|
573
|
+
result = re.sub(pattern, replacement, result)
|
|
574
|
+
|
|
575
|
+
# Yield processed content, keeping some buffer for overlap
|
|
576
|
+
overlap = min(100, len(result) // 2)
|
|
577
|
+
yield result[:-overlap]
|
|
578
|
+
self._buffer = self._buffer[-overlap:]
|
|
579
|
+
|
|
580
|
+
# Flush remaining buffer
|
|
581
|
+
if self._buffer:
|
|
582
|
+
result = self._buffer
|
|
583
|
+
for pattern, replacement in self._patterns:
|
|
584
|
+
result = re.sub(pattern, replacement, result)
|
|
585
|
+
yield result
|
|
586
|
+
self._buffer = ""
|
|
587
|
+
|
|
588
|
+
def reset(self) -> None:
|
|
589
|
+
"""Reset the buffer."""
|
|
590
|
+
with self._lock:
|
|
591
|
+
self._buffer = ""
|
|
592
|
+
|
|
593
|
+
|
|
594
|
+
class StreamAggregator:
|
|
595
|
+
"""
|
|
596
|
+
Aggregate streaming content for batch processing.
|
|
597
|
+
|
|
598
|
+
Collects chunks until a condition is met, then yields
|
|
599
|
+
the aggregated content.
|
|
600
|
+
|
|
601
|
+
Example:
|
|
602
|
+
>>> aggregator = StreamAggregator(
|
|
603
|
+
... min_chars=100,
|
|
604
|
+
... timeout=1.0,
|
|
605
|
+
... )
|
|
606
|
+
>>> async for batch in aggregator.aggregate(stream):
|
|
607
|
+
... process_batch(batch)
|
|
608
|
+
"""
|
|
609
|
+
|
|
610
|
+
def __init__(
|
|
611
|
+
self,
|
|
612
|
+
min_chars: int = 0,
|
|
613
|
+
max_chars: int = 10000,
|
|
614
|
+
timeout: float = 0.5,
|
|
615
|
+
delimiter: str | None = None,
|
|
616
|
+
) -> None:
|
|
617
|
+
"""
|
|
618
|
+
Initialize the aggregator.
|
|
619
|
+
|
|
620
|
+
Args:
|
|
621
|
+
min_chars: Minimum characters before yielding.
|
|
622
|
+
max_chars: Maximum characters to buffer.
|
|
623
|
+
timeout: Timeout in seconds before yielding.
|
|
624
|
+
delimiter: Optional delimiter that triggers yield.
|
|
625
|
+
"""
|
|
626
|
+
self.min_chars = min_chars
|
|
627
|
+
self.max_chars = max_chars
|
|
628
|
+
self.timeout = timeout
|
|
629
|
+
self.delimiter = delimiter
|
|
630
|
+
self._buffer: str = ""
|
|
631
|
+
|
|
632
|
+
async def aggregate(self, stream: AsyncIterator[str]) -> AsyncIterator[str]:
|
|
633
|
+
"""
|
|
634
|
+
Aggregate streaming content.
|
|
635
|
+
|
|
636
|
+
Args:
|
|
637
|
+
stream: Source async iterator.
|
|
638
|
+
|
|
639
|
+
Yields:
|
|
640
|
+
Aggregated batches.
|
|
641
|
+
"""
|
|
642
|
+
last_yield = asyncio.get_event_loop().time()
|
|
643
|
+
|
|
644
|
+
async for chunk in stream:
|
|
645
|
+
self._buffer += chunk
|
|
646
|
+
now = asyncio.get_event_loop().time()
|
|
647
|
+
|
|
648
|
+
should_yield = False
|
|
649
|
+
|
|
650
|
+
# Check delimiter
|
|
651
|
+
if self.delimiter and self.delimiter in self._buffer:
|
|
652
|
+
should_yield = True
|
|
653
|
+
|
|
654
|
+
# Check max chars
|
|
655
|
+
if len(self._buffer) >= self.max_chars:
|
|
656
|
+
should_yield = True
|
|
657
|
+
|
|
658
|
+
# Check min chars and timeout
|
|
659
|
+
if len(self._buffer) >= self.min_chars and (now - last_yield) >= self.timeout:
|
|
660
|
+
should_yield = True
|
|
661
|
+
|
|
662
|
+
if should_yield and self._buffer:
|
|
663
|
+
yield self._buffer
|
|
664
|
+
self._buffer = ""
|
|
665
|
+
last_yield = now
|
|
666
|
+
|
|
667
|
+
# Flush remaining
|
|
668
|
+
if self._buffer:
|
|
669
|
+
yield self._buffer
|
|
670
|
+
self._buffer = ""
|
|
671
|
+
|
|
672
|
+
def reset(self) -> None:
|
|
673
|
+
"""Reset the buffer."""
|
|
674
|
+
self._buffer = ""
|