control-zero 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- control_zero/__init__.py +31 -0
- control_zero/client.py +584 -0
- control_zero/integrations/crewai/__init__.py +53 -0
- control_zero/integrations/crewai/agent.py +267 -0
- control_zero/integrations/crewai/crew.py +381 -0
- control_zero/integrations/crewai/task.py +291 -0
- control_zero/integrations/crewai/tool.py +299 -0
- control_zero/integrations/langchain/__init__.py +58 -0
- control_zero/integrations/langchain/agent.py +311 -0
- control_zero/integrations/langchain/callbacks.py +441 -0
- control_zero/integrations/langchain/chain.py +319 -0
- control_zero/integrations/langchain/graph.py +441 -0
- control_zero/integrations/langchain/tool.py +271 -0
- control_zero/llm/__init__.py +77 -0
- control_zero/llm/anthropic/__init__.py +35 -0
- control_zero/llm/anthropic/client.py +136 -0
- control_zero/llm/anthropic/messages.py +375 -0
- control_zero/llm/base.py +551 -0
- control_zero/llm/cohere/__init__.py +32 -0
- control_zero/llm/cohere/client.py +402 -0
- control_zero/llm/gemini/__init__.py +34 -0
- control_zero/llm/gemini/client.py +486 -0
- control_zero/llm/groq/__init__.py +32 -0
- control_zero/llm/groq/client.py +330 -0
- control_zero/llm/mistral/__init__.py +32 -0
- control_zero/llm/mistral/client.py +319 -0
- control_zero/llm/ollama/__init__.py +31 -0
- control_zero/llm/ollama/client.py +439 -0
- control_zero/llm/openai/__init__.py +34 -0
- control_zero/llm/openai/chat.py +331 -0
- control_zero/llm/openai/client.py +182 -0
- control_zero/logging/__init__.py +5 -0
- control_zero/logging/async_logger.py +65 -0
- control_zero/mcp/__init__.py +5 -0
- control_zero/mcp/middleware.py +148 -0
- control_zero/policy/__init__.py +5 -0
- control_zero/policy/enforcer.py +99 -0
- control_zero/secrets/__init__.py +5 -0
- control_zero/secrets/manager.py +77 -0
- control_zero/types.py +51 -0
- control_zero-0.2.0.dist-info/METADATA +216 -0
- control_zero-0.2.0.dist-info/RECORD +44 -0
- control_zero-0.2.0.dist-info/WHEEL +4 -0
- control_zero-0.2.0.dist-info/licenses/LICENSE +17 -0
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Control Zero LLM Governance Module.
|
|
3
|
+
|
|
4
|
+
This module provides governance wrappers for vanilla LLM API calls,
|
|
5
|
+
enabling policy enforcement, cost tracking, and audit logging
|
|
6
|
+
without requiring agent frameworks like LangChain or CrewAI.
|
|
7
|
+
|
|
8
|
+
Supported providers:
|
|
9
|
+
- OpenAI (GPT-4, GPT-3.5, etc.)
|
|
10
|
+
- Anthropic (Claude)
|
|
11
|
+
- Google Gemini
|
|
12
|
+
- Groq
|
|
13
|
+
- Mistral
|
|
14
|
+
- Cohere
|
|
15
|
+
- Ollama (local LLMs)
|
|
16
|
+
|
|
17
|
+
Usage:
|
|
18
|
+
from control_zero import ControlZeroClient
|
|
19
|
+
from control_zero.llm.openai import GovernedOpenAI
|
|
20
|
+
from openai import OpenAI
|
|
21
|
+
|
|
22
|
+
# Initialize Control Zero client
|
|
23
|
+
cz_client = ControlZeroClient(api_key="cz_live_xxx")
|
|
24
|
+
cz_client.initialize()
|
|
25
|
+
|
|
26
|
+
# Wrap OpenAI client with governance
|
|
27
|
+
openai_client = OpenAI()
|
|
28
|
+
governed = GovernedOpenAI(client=openai_client, control_zero=cz_client)
|
|
29
|
+
|
|
30
|
+
# All calls are now governed
|
|
31
|
+
response = governed.chat.completions.create(
|
|
32
|
+
model="gpt-4",
|
|
33
|
+
messages=[{"role": "user", "content": "Hello"}]
|
|
34
|
+
)
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
from control_zero.llm.base import (
|
|
38
|
+
GovernedLLM,
|
|
39
|
+
LLMGovernanceConfig,
|
|
40
|
+
LLMUsageMetrics,
|
|
41
|
+
ModelPolicy,
|
|
42
|
+
CostPolicy,
|
|
43
|
+
FunctionPolicy,
|
|
44
|
+
ContentPolicy,
|
|
45
|
+
GovernanceAction,
|
|
46
|
+
estimate_cost,
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
# Provider-specific imports
|
|
50
|
+
from control_zero.llm.openai import GovernedOpenAI
|
|
51
|
+
from control_zero.llm.anthropic import GovernedAnthropic
|
|
52
|
+
from control_zero.llm.gemini import GovernedGemini
|
|
53
|
+
from control_zero.llm.groq import GovernedGroq
|
|
54
|
+
from control_zero.llm.mistral import GovernedMistral
|
|
55
|
+
from control_zero.llm.cohere import GovernedCohere
|
|
56
|
+
from control_zero.llm.ollama import GovernedOllama
|
|
57
|
+
|
|
58
|
+
__all__ = [
|
|
59
|
+
# Base classes
|
|
60
|
+
"GovernedLLM",
|
|
61
|
+
"LLMGovernanceConfig",
|
|
62
|
+
"LLMUsageMetrics",
|
|
63
|
+
"ModelPolicy",
|
|
64
|
+
"CostPolicy",
|
|
65
|
+
"FunctionPolicy",
|
|
66
|
+
"ContentPolicy",
|
|
67
|
+
"GovernanceAction",
|
|
68
|
+
"estimate_cost",
|
|
69
|
+
# Provider wrappers
|
|
70
|
+
"GovernedOpenAI",
|
|
71
|
+
"GovernedAnthropic",
|
|
72
|
+
"GovernedGemini",
|
|
73
|
+
"GovernedGroq",
|
|
74
|
+
"GovernedMistral",
|
|
75
|
+
"GovernedCohere",
|
|
76
|
+
"GovernedOllama",
|
|
77
|
+
]
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Control Zero Anthropic Governance Wrapper.
|
|
3
|
+
|
|
4
|
+
This module provides governance wrappers for the Anthropic Python SDK,
|
|
5
|
+
enabling policy enforcement, cost tracking, and audit logging for
|
|
6
|
+
all Claude API calls.
|
|
7
|
+
|
|
8
|
+
Usage:
|
|
9
|
+
from control_zero import ControlZeroClient
|
|
10
|
+
from control_zero.llm.anthropic import GovernedAnthropic
|
|
11
|
+
from anthropic import Anthropic
|
|
12
|
+
|
|
13
|
+
# Initialize Control Zero
|
|
14
|
+
cz_client = ControlZeroClient(api_key="cz_live_xxx")
|
|
15
|
+
cz_client.initialize()
|
|
16
|
+
|
|
17
|
+
# Wrap Anthropic client with governance
|
|
18
|
+
anthropic_client = Anthropic()
|
|
19
|
+
governed = GovernedAnthropic(client=anthropic_client, control_zero=cz_client)
|
|
20
|
+
|
|
21
|
+
# All calls are now governed
|
|
22
|
+
response = governed.messages.create(
|
|
23
|
+
model="claude-3-sonnet-20240229",
|
|
24
|
+
max_tokens=1024,
|
|
25
|
+
messages=[{"role": "user", "content": "Hello"}]
|
|
26
|
+
)
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
from control_zero.llm.anthropic.client import GovernedAnthropic
|
|
30
|
+
from control_zero.llm.anthropic.messages import GovernedMessages
|
|
31
|
+
|
|
32
|
+
__all__ = [
|
|
33
|
+
"GovernedAnthropic",
|
|
34
|
+
"GovernedMessages",
|
|
35
|
+
]
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Governed Anthropic client wrapper.
|
|
3
|
+
|
|
4
|
+
Provides governance features for the Anthropic Python SDK including:
|
|
5
|
+
- Model access control
|
|
6
|
+
- Cost tracking and limits
|
|
7
|
+
- Tool use governance
|
|
8
|
+
- PII detection and masking
|
|
9
|
+
- Audit logging
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from typing import Any, Dict, Optional
|
|
13
|
+
|
|
14
|
+
from control_zero.llm.base import (
|
|
15
|
+
GovernedLLM,
|
|
16
|
+
LLMGovernanceConfig,
|
|
17
|
+
)
|
|
18
|
+
from control_zero.llm.anthropic.messages import GovernedMessages
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class GovernedAnthropic(GovernedLLM):
|
|
22
|
+
"""
|
|
23
|
+
Governed wrapper for the Anthropic Python SDK.
|
|
24
|
+
|
|
25
|
+
This class wraps an Anthropic client instance and adds governance
|
|
26
|
+
features including policy enforcement, cost tracking, and audit logging.
|
|
27
|
+
|
|
28
|
+
Example:
|
|
29
|
+
from control_zero import ControlZeroClient
|
|
30
|
+
from control_zero.llm.anthropic import GovernedAnthropic
|
|
31
|
+
from anthropic import Anthropic
|
|
32
|
+
|
|
33
|
+
cz = ControlZeroClient(api_key="...")
|
|
34
|
+
cz.initialize()
|
|
35
|
+
|
|
36
|
+
client = Anthropic()
|
|
37
|
+
governed = GovernedAnthropic(client=client, control_zero=cz)
|
|
38
|
+
|
|
39
|
+
# Configure governance
|
|
40
|
+
from control_zero.llm import LLMGovernanceConfig, ModelPolicy, CostPolicy
|
|
41
|
+
|
|
42
|
+
governed = GovernedAnthropic(
|
|
43
|
+
client=client,
|
|
44
|
+
control_zero=cz,
|
|
45
|
+
config=LLMGovernanceConfig(
|
|
46
|
+
model_policy=ModelPolicy(
|
|
47
|
+
allowed_models=["claude-3-sonnet", "claude-3-haiku"],
|
|
48
|
+
max_tokens_per_request=4096
|
|
49
|
+
),
|
|
50
|
+
cost_policy=CostPolicy(
|
|
51
|
+
max_cost_per_day=10.00,
|
|
52
|
+
max_requests_per_day=1000
|
|
53
|
+
)
|
|
54
|
+
),
|
|
55
|
+
user_context={"user_id": "user_123", "role": "developer"}
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
# Make governed API call
|
|
59
|
+
response = governed.messages.create(
|
|
60
|
+
model="claude-3-sonnet-20240229",
|
|
61
|
+
max_tokens=1024,
|
|
62
|
+
messages=[{"role": "user", "content": "Hello!"}]
|
|
63
|
+
)
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
def __init__(
|
|
67
|
+
self,
|
|
68
|
+
client: Any, # anthropic.Anthropic
|
|
69
|
+
control_zero: Any, # ControlZeroClient
|
|
70
|
+
config: Optional[LLMGovernanceConfig] = None,
|
|
71
|
+
user_context: Optional[Dict[str, Any]] = None,
|
|
72
|
+
):
|
|
73
|
+
"""
|
|
74
|
+
Initialize a governed Anthropic client.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
client: An Anthropic client instance
|
|
78
|
+
control_zero: Control Zero client for policy and logging
|
|
79
|
+
config: Governance configuration
|
|
80
|
+
user_context: Context about the current user
|
|
81
|
+
"""
|
|
82
|
+
super().__init__(client, control_zero, config, user_context)
|
|
83
|
+
|
|
84
|
+
# Create governed namespaces
|
|
85
|
+
self._messages = GovernedMessages(self)
|
|
86
|
+
|
|
87
|
+
@property
|
|
88
|
+
def provider_name(self) -> str:
|
|
89
|
+
return "anthropic"
|
|
90
|
+
|
|
91
|
+
@property
|
|
92
|
+
def messages(self) -> GovernedMessages:
|
|
93
|
+
"""Access governed messages API."""
|
|
94
|
+
return self._messages
|
|
95
|
+
|
|
96
|
+
@property
|
|
97
|
+
def completions(self):
|
|
98
|
+
"""Access the underlying completions endpoint (legacy, pass-through)."""
|
|
99
|
+
return self._client.completions
|
|
100
|
+
|
|
101
|
+
def with_user_context(self, user_context: Dict[str, Any]) -> "GovernedAnthropic":
|
|
102
|
+
"""
|
|
103
|
+
Create a new governed client with updated user context.
|
|
104
|
+
|
|
105
|
+
This is useful for per-request user context in multi-tenant applications.
|
|
106
|
+
|
|
107
|
+
Args:
|
|
108
|
+
user_context: New user context (merged with existing)
|
|
109
|
+
|
|
110
|
+
Returns:
|
|
111
|
+
New GovernedAnthropic instance with updated context
|
|
112
|
+
"""
|
|
113
|
+
merged_context = {**self._user_context, **user_context}
|
|
114
|
+
return GovernedAnthropic(
|
|
115
|
+
client=self._client,
|
|
116
|
+
control_zero=self._cz,
|
|
117
|
+
config=self._config,
|
|
118
|
+
user_context=merged_context,
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
def with_config(self, config: LLMGovernanceConfig) -> "GovernedAnthropic":
|
|
122
|
+
"""
|
|
123
|
+
Create a new governed client with updated configuration.
|
|
124
|
+
|
|
125
|
+
Args:
|
|
126
|
+
config: New governance configuration
|
|
127
|
+
|
|
128
|
+
Returns:
|
|
129
|
+
New GovernedAnthropic instance with updated config
|
|
130
|
+
"""
|
|
131
|
+
return GovernedAnthropic(
|
|
132
|
+
client=self._client,
|
|
133
|
+
control_zero=self._cz,
|
|
134
|
+
config=config,
|
|
135
|
+
user_context=self._user_context,
|
|
136
|
+
)
|
|
@@ -0,0 +1,375 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Governed Anthropic Messages API.
|
|
3
|
+
|
|
4
|
+
Provides governance wrapper for Anthropic's messages API,
|
|
5
|
+
including support for tool use and streaming.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import time
|
|
9
|
+
from typing import Any, Dict, Iterator, List, Optional, Union
|
|
10
|
+
|
|
11
|
+
from control_zero.llm.base import (
|
|
12
|
+
GovernanceAction,
|
|
13
|
+
GovernedChatMixin,
|
|
14
|
+
LLMUsageMetrics,
|
|
15
|
+
estimate_cost,
|
|
16
|
+
)
|
|
17
|
+
from control_zero.policy import PolicyDeniedError
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class GovernedMessages(GovernedChatMixin):
|
|
21
|
+
"""
|
|
22
|
+
Governed wrapper for Anthropic messages API.
|
|
23
|
+
|
|
24
|
+
Supports:
|
|
25
|
+
- Standard message creation
|
|
26
|
+
- Streaming responses
|
|
27
|
+
- Tool use
|
|
28
|
+
- System prompts
|
|
29
|
+
- Multi-modal content (text + images)
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
def __init__(self, governed_client: Any): # GovernedAnthropic
|
|
33
|
+
self._governed = governed_client
|
|
34
|
+
self._client = governed_client._client
|
|
35
|
+
|
|
36
|
+
def create(
|
|
37
|
+
self,
|
|
38
|
+
*,
|
|
39
|
+
model: str,
|
|
40
|
+
max_tokens: int,
|
|
41
|
+
messages: List[Dict[str, Any]],
|
|
42
|
+
system: Optional[Union[str, List[Dict[str, Any]]]] = None,
|
|
43
|
+
tools: Optional[List[Dict[str, Any]]] = None,
|
|
44
|
+
tool_choice: Optional[Dict[str, Any]] = None,
|
|
45
|
+
stream: bool = False,
|
|
46
|
+
temperature: Optional[float] = None,
|
|
47
|
+
top_p: Optional[float] = None,
|
|
48
|
+
top_k: Optional[int] = None,
|
|
49
|
+
stop_sequences: Optional[List[str]] = None,
|
|
50
|
+
metadata: Optional[Dict[str, str]] = None,
|
|
51
|
+
**kwargs,
|
|
52
|
+
) -> Any:
|
|
53
|
+
"""
|
|
54
|
+
Create a governed message.
|
|
55
|
+
|
|
56
|
+
All parameters match the Anthropic API, with added governance:
|
|
57
|
+
- Model access is checked against policy
|
|
58
|
+
- Cost is estimated and checked against limits
|
|
59
|
+
- Tools are filtered by policy
|
|
60
|
+
- PII is detected and optionally masked
|
|
61
|
+
- Request is logged for audit
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
model: Model to use (e.g., "claude-3-sonnet-20240229")
|
|
65
|
+
max_tokens: Maximum tokens in response
|
|
66
|
+
messages: List of message dicts with role and content
|
|
67
|
+
system: System prompt (string or structured content)
|
|
68
|
+
tools: Tool definitions for tool use
|
|
69
|
+
tool_choice: How to handle tool selection
|
|
70
|
+
stream: Whether to stream the response
|
|
71
|
+
temperature: Sampling temperature
|
|
72
|
+
top_p: Nucleus sampling parameter
|
|
73
|
+
top_k: Top-k sampling parameter
|
|
74
|
+
stop_sequences: Stop sequences
|
|
75
|
+
metadata: Request metadata
|
|
76
|
+
**kwargs: Additional parameters passed to Anthropic
|
|
77
|
+
|
|
78
|
+
Returns:
|
|
79
|
+
Message response or stream iterator
|
|
80
|
+
|
|
81
|
+
Raises:
|
|
82
|
+
PolicyDeniedError: If request violates governance policy
|
|
83
|
+
"""
|
|
84
|
+
start_time = time.time()
|
|
85
|
+
|
|
86
|
+
# Estimate tokens for governance checks
|
|
87
|
+
estimated_input_tokens = self._estimate_message_tokens(messages)
|
|
88
|
+
if system:
|
|
89
|
+
if isinstance(system, str):
|
|
90
|
+
estimated_input_tokens += len(system) // 4
|
|
91
|
+
elif isinstance(system, list):
|
|
92
|
+
for block in system:
|
|
93
|
+
if block.get("type") == "text":
|
|
94
|
+
estimated_input_tokens += len(block.get("text", "")) // 4
|
|
95
|
+
|
|
96
|
+
# Convert tools to standard format for checking
|
|
97
|
+
tools_to_check = []
|
|
98
|
+
if tools:
|
|
99
|
+
for tool in tools:
|
|
100
|
+
tools_to_check.append({
|
|
101
|
+
"name": tool.get("name", ""),
|
|
102
|
+
"type": "function",
|
|
103
|
+
"function": tool,
|
|
104
|
+
})
|
|
105
|
+
|
|
106
|
+
# Run pre-request governance checks
|
|
107
|
+
self._governed._pre_request_checks(
|
|
108
|
+
model=model,
|
|
109
|
+
action=GovernanceAction.CHAT_COMPLETION,
|
|
110
|
+
messages=messages,
|
|
111
|
+
functions=tools_to_check,
|
|
112
|
+
estimated_tokens=estimated_input_tokens,
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
# Process messages according to governance policies
|
|
116
|
+
processed_messages = self._process_messages_for_governance(messages)
|
|
117
|
+
|
|
118
|
+
# Handle system prompt
|
|
119
|
+
processed_system = system
|
|
120
|
+
if system and self._governed._config.content_policy.enable_pii_detection:
|
|
121
|
+
pii_action = self._governed._config.content_policy.pii_action
|
|
122
|
+
if pii_action == "mask":
|
|
123
|
+
if isinstance(system, str):
|
|
124
|
+
processed_system = self._governed._mask_pii(system)
|
|
125
|
+
elif isinstance(system, list):
|
|
126
|
+
processed_system = []
|
|
127
|
+
for block in system:
|
|
128
|
+
if block.get("type") == "text":
|
|
129
|
+
new_block = block.copy()
|
|
130
|
+
new_block["text"] = self._governed._mask_pii(block.get("text", ""))
|
|
131
|
+
processed_system.append(new_block)
|
|
132
|
+
else:
|
|
133
|
+
processed_system.append(block)
|
|
134
|
+
|
|
135
|
+
# Add required system prompt if configured
|
|
136
|
+
content_policy = self._governed._config.content_policy
|
|
137
|
+
if content_policy.required_system_prompt:
|
|
138
|
+
if processed_system is None:
|
|
139
|
+
processed_system = content_policy.required_system_prompt
|
|
140
|
+
elif isinstance(processed_system, str):
|
|
141
|
+
processed_system = f"{content_policy.required_system_prompt}\n\n{processed_system}"
|
|
142
|
+
|
|
143
|
+
# Filter tools according to policy
|
|
144
|
+
filtered_tools = self._filter_tools_for_governance(tools)
|
|
145
|
+
|
|
146
|
+
# Apply max_tokens limit from governance
|
|
147
|
+
governed_max_tokens = max_tokens
|
|
148
|
+
if content_policy.max_output_tokens:
|
|
149
|
+
governed_max_tokens = min(max_tokens, content_policy.max_output_tokens)
|
|
150
|
+
|
|
151
|
+
# Build request kwargs
|
|
152
|
+
request_kwargs = {
|
|
153
|
+
"model": model,
|
|
154
|
+
"max_tokens": governed_max_tokens,
|
|
155
|
+
"messages": processed_messages,
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
# Add optional parameters
|
|
159
|
+
if processed_system is not None:
|
|
160
|
+
request_kwargs["system"] = processed_system
|
|
161
|
+
if filtered_tools is not None:
|
|
162
|
+
request_kwargs["tools"] = filtered_tools
|
|
163
|
+
if tool_choice is not None and filtered_tools:
|
|
164
|
+
request_kwargs["tool_choice"] = tool_choice
|
|
165
|
+
if temperature is not None:
|
|
166
|
+
request_kwargs["temperature"] = temperature
|
|
167
|
+
if top_p is not None:
|
|
168
|
+
request_kwargs["top_p"] = top_p
|
|
169
|
+
if top_k is not None:
|
|
170
|
+
request_kwargs["top_k"] = top_k
|
|
171
|
+
if stop_sequences is not None:
|
|
172
|
+
request_kwargs["stop_sequences"] = stop_sequences
|
|
173
|
+
if metadata is not None:
|
|
174
|
+
request_kwargs["metadata"] = metadata
|
|
175
|
+
|
|
176
|
+
# Add user_id to metadata if available
|
|
177
|
+
if self._governed._user_context.get("user_id"):
|
|
178
|
+
if "metadata" not in request_kwargs:
|
|
179
|
+
request_kwargs["metadata"] = {}
|
|
180
|
+
request_kwargs["metadata"]["user_id"] = str(self._governed._user_context["user_id"])
|
|
181
|
+
|
|
182
|
+
# Add any extra kwargs
|
|
183
|
+
request_kwargs.update(kwargs)
|
|
184
|
+
|
|
185
|
+
# Handle streaming
|
|
186
|
+
if stream:
|
|
187
|
+
request_kwargs["stream"] = True
|
|
188
|
+
return self._create_stream(request_kwargs, start_time, model)
|
|
189
|
+
|
|
190
|
+
# Make the API call
|
|
191
|
+
try:
|
|
192
|
+
response = self._client.messages.create(**request_kwargs)
|
|
193
|
+
latency_ms = int((time.time() - start_time) * 1000)
|
|
194
|
+
|
|
195
|
+
# Extract usage metrics
|
|
196
|
+
usage = getattr(response, "usage", None)
|
|
197
|
+
input_tokens = usage.input_tokens if usage else estimated_input_tokens
|
|
198
|
+
output_tokens = usage.output_tokens if usage else 0
|
|
199
|
+
total_tokens = input_tokens + output_tokens
|
|
200
|
+
|
|
201
|
+
# Count tool uses
|
|
202
|
+
tool_use_count = 0
|
|
203
|
+
for block in response.content:
|
|
204
|
+
if getattr(block, "type", None) == "tool_use":
|
|
205
|
+
tool_use_count += 1
|
|
206
|
+
|
|
207
|
+
# Record metrics
|
|
208
|
+
metrics = LLMUsageMetrics(
|
|
209
|
+
provider="anthropic",
|
|
210
|
+
model=model,
|
|
211
|
+
action=GovernanceAction.CHAT_COMPLETION,
|
|
212
|
+
input_tokens=input_tokens,
|
|
213
|
+
output_tokens=output_tokens,
|
|
214
|
+
total_tokens=total_tokens,
|
|
215
|
+
latency_ms=latency_ms,
|
|
216
|
+
estimated_cost=estimate_cost(model, input_tokens, output_tokens),
|
|
217
|
+
function_calls=tool_use_count,
|
|
218
|
+
cached=getattr(usage, "cache_read_input_tokens", 0) > 0 if usage else False,
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
# Update tracking and log
|
|
222
|
+
self._governed._post_request_update(metrics)
|
|
223
|
+
self._governed._log_request(model, GovernanceAction.CHAT_COMPLETION, metrics)
|
|
224
|
+
|
|
225
|
+
return response
|
|
226
|
+
|
|
227
|
+
except PolicyDeniedError:
|
|
228
|
+
raise
|
|
229
|
+
except Exception as e:
|
|
230
|
+
latency_ms = int((time.time() - start_time) * 1000)
|
|
231
|
+
|
|
232
|
+
# Log error
|
|
233
|
+
metrics = LLMUsageMetrics(
|
|
234
|
+
provider="anthropic",
|
|
235
|
+
model=model,
|
|
236
|
+
action=GovernanceAction.CHAT_COMPLETION,
|
|
237
|
+
latency_ms=latency_ms,
|
|
238
|
+
)
|
|
239
|
+
self._governed._log_request(
|
|
240
|
+
model, GovernanceAction.CHAT_COMPLETION, metrics,
|
|
241
|
+
status="error", error=str(e)
|
|
242
|
+
)
|
|
243
|
+
raise
|
|
244
|
+
|
|
245
|
+
def _create_stream(
|
|
246
|
+
self,
|
|
247
|
+
request_kwargs: Dict[str, Any],
|
|
248
|
+
start_time: float,
|
|
249
|
+
model: str,
|
|
250
|
+
) -> Iterator[Any]:
|
|
251
|
+
"""
|
|
252
|
+
Create a governed streaming response.
|
|
253
|
+
|
|
254
|
+
Streams the response while tracking tokens and logging at completion.
|
|
255
|
+
"""
|
|
256
|
+
input_tokens = 0
|
|
257
|
+
output_tokens = 0
|
|
258
|
+
tool_use_count = 0
|
|
259
|
+
|
|
260
|
+
try:
|
|
261
|
+
with self._client.messages.stream(**{k: v for k, v in request_kwargs.items() if k != "stream"}) as stream:
|
|
262
|
+
for event in stream:
|
|
263
|
+
# Track usage from events
|
|
264
|
+
if hasattr(event, "type"):
|
|
265
|
+
if event.type == "message_start":
|
|
266
|
+
if hasattr(event, "message") and hasattr(event.message, "usage"):
|
|
267
|
+
input_tokens = event.message.usage.input_tokens
|
|
268
|
+
elif event.type == "message_delta":
|
|
269
|
+
if hasattr(event, "usage"):
|
|
270
|
+
output_tokens = event.usage.output_tokens
|
|
271
|
+
elif event.type == "content_block_start":
|
|
272
|
+
if hasattr(event, "content_block"):
|
|
273
|
+
if getattr(event.content_block, "type", None) == "tool_use":
|
|
274
|
+
tool_use_count += 1
|
|
275
|
+
|
|
276
|
+
yield event
|
|
277
|
+
|
|
278
|
+
# Calculate final metrics
|
|
279
|
+
latency_ms = int((time.time() - start_time) * 1000)
|
|
280
|
+
|
|
281
|
+
metrics = LLMUsageMetrics(
|
|
282
|
+
provider="anthropic",
|
|
283
|
+
model=model,
|
|
284
|
+
action=GovernanceAction.CHAT_COMPLETION,
|
|
285
|
+
input_tokens=input_tokens,
|
|
286
|
+
output_tokens=output_tokens,
|
|
287
|
+
total_tokens=input_tokens + output_tokens,
|
|
288
|
+
latency_ms=latency_ms,
|
|
289
|
+
estimated_cost=estimate_cost(model, input_tokens, output_tokens),
|
|
290
|
+
function_calls=tool_use_count,
|
|
291
|
+
)
|
|
292
|
+
|
|
293
|
+
self._governed._post_request_update(metrics)
|
|
294
|
+
self._governed._log_request(model, GovernanceAction.CHAT_COMPLETION, metrics)
|
|
295
|
+
|
|
296
|
+
except Exception as e:
|
|
297
|
+
latency_ms = int((time.time() - start_time) * 1000)
|
|
298
|
+
metrics = LLMUsageMetrics(
|
|
299
|
+
provider="anthropic",
|
|
300
|
+
model=model,
|
|
301
|
+
action=GovernanceAction.CHAT_COMPLETION,
|
|
302
|
+
latency_ms=latency_ms,
|
|
303
|
+
)
|
|
304
|
+
self._governed._log_request(
|
|
305
|
+
model, GovernanceAction.CHAT_COMPLETION, metrics,
|
|
306
|
+
status="error", error=str(e)
|
|
307
|
+
)
|
|
308
|
+
raise
|
|
309
|
+
|
|
310
|
+
def _filter_tools_for_governance(
|
|
311
|
+
self,
|
|
312
|
+
tools: Optional[List[Dict[str, Any]]],
|
|
313
|
+
) -> Optional[List[Dict[str, Any]]]:
|
|
314
|
+
"""Filter tools according to governance policies."""
|
|
315
|
+
if not tools:
|
|
316
|
+
return tools
|
|
317
|
+
|
|
318
|
+
config = self._governed._config
|
|
319
|
+
policy = config.function_policy
|
|
320
|
+
|
|
321
|
+
if not policy.allowed_functions and not policy.denied_functions:
|
|
322
|
+
return tools
|
|
323
|
+
|
|
324
|
+
filtered = []
|
|
325
|
+
for tool in tools:
|
|
326
|
+
tool_name = tool.get("name", "")
|
|
327
|
+
|
|
328
|
+
# Skip denied
|
|
329
|
+
if policy.denied_functions:
|
|
330
|
+
denied = False
|
|
331
|
+
for denied_name in policy.denied_functions:
|
|
332
|
+
if denied_name.lower() in tool_name.lower():
|
|
333
|
+
denied = True
|
|
334
|
+
break
|
|
335
|
+
if denied:
|
|
336
|
+
continue
|
|
337
|
+
|
|
338
|
+
# Check allowed
|
|
339
|
+
if policy.allowed_functions:
|
|
340
|
+
allowed = False
|
|
341
|
+
for allowed_name in policy.allowed_functions:
|
|
342
|
+
if allowed_name.lower() in tool_name.lower():
|
|
343
|
+
allowed = True
|
|
344
|
+
break
|
|
345
|
+
if not allowed:
|
|
346
|
+
continue
|
|
347
|
+
|
|
348
|
+
filtered.append(tool)
|
|
349
|
+
|
|
350
|
+
return filtered if filtered else None
|
|
351
|
+
|
|
352
|
+
def _estimate_message_tokens(self, messages: List[Dict[str, Any]]) -> int:
|
|
353
|
+
"""
|
|
354
|
+
Estimate token count for messages.
|
|
355
|
+
|
|
356
|
+
This is a rough estimate based on character count.
|
|
357
|
+
For accurate counts, use Anthropic's token counting API.
|
|
358
|
+
"""
|
|
359
|
+
total_chars = 0
|
|
360
|
+
for msg in messages:
|
|
361
|
+
content = msg.get("content", "")
|
|
362
|
+
if isinstance(content, str):
|
|
363
|
+
total_chars += len(content)
|
|
364
|
+
elif isinstance(content, list):
|
|
365
|
+
# Multi-modal content
|
|
366
|
+
for part in content:
|
|
367
|
+
if isinstance(part, dict):
|
|
368
|
+
if part.get("type") == "text":
|
|
369
|
+
total_chars += len(part.get("text", ""))
|
|
370
|
+
elif part.get("type") == "image":
|
|
371
|
+
# Images cost approximately 1600 tokens
|
|
372
|
+
total_chars += 6400
|
|
373
|
+
|
|
374
|
+
# Rough estimate: ~4 chars per token
|
|
375
|
+
return max(1, total_chars // 4)
|