agentops-cockpit 0.5.0__py3-none-any.whl → 0.9.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_ops_cockpit/agent.py +137 -0
- agent_ops_cockpit/cli/main.py +104 -11
- agent_ops_cockpit/eval/load_test.py +15 -10
- agent_ops_cockpit/eval/quality_climber.py +23 -5
- agent_ops_cockpit/eval/red_team.py +5 -4
- agent_ops_cockpit/mcp_server.py +55 -21
- agent_ops_cockpit/ops/arch_review.py +78 -17
- agent_ops_cockpit/ops/cost_optimizer.py +0 -1
- agent_ops_cockpit/ops/evidence_bridge.py +132 -0
- agent_ops_cockpit/ops/frameworks.py +79 -10
- agent_ops_cockpit/ops/mcp_hub.py +1 -2
- agent_ops_cockpit/ops/orchestrator.py +363 -49
- agent_ops_cockpit/ops/pii_scrubber.py +1 -1
- agent_ops_cockpit/ops/policies.json +26 -0
- agent_ops_cockpit/ops/policy_engine.py +85 -0
- agent_ops_cockpit/ops/reliability.py +30 -10
- agent_ops_cockpit/ops/secret_scanner.py +10 -3
- agent_ops_cockpit/ops/ui_auditor.py +52 -11
- agent_ops_cockpit/ops/watcher.py +138 -0
- agent_ops_cockpit/ops/watchlist.json +88 -0
- agent_ops_cockpit/optimizer.py +361 -53
- agent_ops_cockpit/shadow/router.py +7 -8
- agent_ops_cockpit/system_prompt.md +13 -0
- agent_ops_cockpit/tests/golden_set.json +52 -0
- agent_ops_cockpit/tests/test_agent.py +34 -0
- agent_ops_cockpit/tests/test_arch_review.py +45 -0
- agent_ops_cockpit/tests/test_frameworks.py +100 -0
- agent_ops_cockpit/tests/test_optimizer.py +68 -0
- agent_ops_cockpit/tests/test_quality_climber.py +18 -0
- agent_ops_cockpit/tests/test_red_team.py +35 -0
- agent_ops_cockpit/tests/test_secret_scanner.py +24 -0
- agentops_cockpit-0.9.5.dist-info/METADATA +246 -0
- agentops_cockpit-0.9.5.dist-info/RECORD +47 -0
- {agentops_cockpit-0.5.0.dist-info → agentops_cockpit-0.9.5.dist-info}/entry_points.txt +1 -1
- agentops_cockpit-0.5.0.dist-info/METADATA +0 -171
- agentops_cockpit-0.5.0.dist-info/RECORD +0 -32
- {agentops_cockpit-0.5.0.dist-info → agentops_cockpit-0.9.5.dist-info}/WHEEL +0 -0
- {agentops_cockpit-0.5.0.dist-info → agentops_cockpit-0.9.5.dist-info}/licenses/LICENSE +0 -0
agent_ops_cockpit/optimizer.py
CHANGED
|
@@ -1,88 +1,346 @@
|
|
|
1
|
-
import
|
|
1
|
+
from __future__ import annotations
|
|
2
2
|
import os
|
|
3
3
|
import re
|
|
4
|
-
import
|
|
5
|
-
from typing import List, Dict
|
|
4
|
+
from typing import List, Dict, Any
|
|
6
5
|
import typer
|
|
7
6
|
from rich.console import Console
|
|
8
7
|
from rich.table import Table
|
|
9
8
|
from rich.panel import Panel
|
|
10
9
|
from rich.syntax import Syntax
|
|
10
|
+
from packaging import version
|
|
11
|
+
|
|
12
|
+
# Import the evidence bridge
|
|
13
|
+
try:
|
|
14
|
+
from agent_ops_cockpit.ops.evidence_bridge import get_package_evidence, get_compatibility_report
|
|
15
|
+
except ImportError:
|
|
16
|
+
# Fallback for local execution
|
|
17
|
+
try:
|
|
18
|
+
from backend.ops.evidence_bridge import get_package_evidence, get_compatibility_report
|
|
19
|
+
except ImportError:
|
|
20
|
+
# Final fallback
|
|
21
|
+
def get_package_evidence(pkg): return {}
|
|
22
|
+
def get_compatibility_report(imports): return []
|
|
11
23
|
|
|
12
24
|
app = typer.Typer(help="AgentOps Cockpit: The Agent Optimizer CLI")
|
|
13
25
|
console = Console()
|
|
14
26
|
|
|
15
27
|
class OptimizationIssue:
|
|
16
|
-
def __init__(self, id: str, title: str, impact: str, savings: str, description: str, diff: str, fix_pattern: str = None):
|
|
28
|
+
def __init__(self, id: str, title: str, impact: str, savings: str, description: str, diff: str, package: str = None, fix_pattern: str = None):
|
|
17
29
|
self.id = id
|
|
18
30
|
self.title = title
|
|
19
31
|
self.impact = impact
|
|
20
32
|
self.savings = savings
|
|
21
33
|
self.description = description
|
|
22
34
|
self.diff = diff
|
|
35
|
+
self.package = package
|
|
23
36
|
self.fix_pattern = fix_pattern
|
|
37
|
+
self.evidence = None
|
|
24
38
|
|
|
25
|
-
def analyze_code(content: str, file_path: str = "agent.py") -> List[OptimizationIssue]:
|
|
39
|
+
def analyze_code(content: str, file_path: str = "agent.py", versions: Dict[str, str] = None) -> List[OptimizationIssue]:
|
|
26
40
|
issues = []
|
|
27
41
|
content_lower = content.lower()
|
|
42
|
+
versions = versions or {}
|
|
43
|
+
|
|
44
|
+
# --- SITUATIONAL PLATFORM OPTIMIZATIONS ---
|
|
45
|
+
|
|
46
|
+
v_ai = versions.get("google-cloud-aiplatform", "Not Installed")
|
|
47
|
+
if "google.cloud.aiplatform" in content_lower or "vertexai" in content_lower:
|
|
48
|
+
if v_ai == "Not Installed":
|
|
49
|
+
issues.append(OptimizationIssue(
|
|
50
|
+
"vertex_install", "Install Modern Vertex SDK", "HIGH", "90% cost savings",
|
|
51
|
+
"You appear to be using Vertex AI logic but the SDK is not in your environment. Install v1.70.0+ to unlock context caching.",
|
|
52
|
+
"+ # pip install google-cloud-aiplatform>=1.70.0",
|
|
53
|
+
package="google-cloud-aiplatform"
|
|
54
|
+
))
|
|
55
|
+
elif v_ai != "Unknown":
|
|
56
|
+
try:
|
|
57
|
+
if version.parse(v_ai) < version.parse("1.70.0"):
|
|
58
|
+
issues.append(OptimizationIssue(
|
|
59
|
+
"vertex_legacy_opt", "Situational Performance (Legacy SDK)", "MEDIUM", "20% cost savings",
|
|
60
|
+
f"Your SDK ({v_ai}) lacks native Context Caching. Optimize by using selective prompt pruning before execution.",
|
|
61
|
+
"+ from agent_ops_cockpit.ops.cost_optimizer import situational_pruning\n+ pruned = situational_pruning(context)",
|
|
62
|
+
package="google-cloud-aiplatform"
|
|
63
|
+
))
|
|
64
|
+
issues.append(OptimizationIssue(
|
|
65
|
+
"vertex_upgrade_path", "Modernization Path", "HIGH", "90% cost savings",
|
|
66
|
+
"Upgrading to 1.70.0+ enables near-instant token reuse via CachingConfig.",
|
|
67
|
+
"+ # Upgrade to >1.70.0",
|
|
68
|
+
package="google-cloud-aiplatform"
|
|
69
|
+
))
|
|
70
|
+
elif "cache" not in content_lower:
|
|
71
|
+
issues.append(OptimizationIssue(
|
|
72
|
+
"context_caching", "Enable Context Caching", "HIGH", "90% cost reduction",
|
|
73
|
+
"Large model context detected. Use native CachingConfig.",
|
|
74
|
+
"+ cache = vertexai.preview.CachingConfig(ttl=3600)",
|
|
75
|
+
package="google-cloud-aiplatform"
|
|
76
|
+
))
|
|
77
|
+
except Exception:
|
|
78
|
+
pass
|
|
79
|
+
|
|
80
|
+
# OpenAI
|
|
81
|
+
openai_v = versions.get("openai", "Not Installed")
|
|
82
|
+
if "openai" in content_lower:
|
|
83
|
+
if openai_v != "Not Installed" and version.parse(openai_v) < version.parse("1.0.0"):
|
|
84
|
+
issues.append(OptimizationIssue(
|
|
85
|
+
"openai_legacy", "Found Legacy OpenAI SDK", "HIGH", "40% latency reduction",
|
|
86
|
+
f"You are on {openai_v}. Transitioning to the v1.0.0+ Client pattern enables modern streaming and improved error handling.",
|
|
87
|
+
"+ from openai import OpenAI\n+ client = OpenAI()",
|
|
88
|
+
package="openai"
|
|
89
|
+
))
|
|
90
|
+
elif "prompt_cache" not in content_lower:
|
|
91
|
+
issues.append(OptimizationIssue(
|
|
92
|
+
"openai_caching", "OpenAI Prompt Caching", "MEDIUM", "50% latency reduction",
|
|
93
|
+
"OpenAI automatically caches repeated input prefixes. Ensure your system prompt is first.",
|
|
94
|
+
"+ # Ensure system prompt is first\n+ messages = [{'role': 'system', ...}]",
|
|
95
|
+
package="openai"
|
|
96
|
+
))
|
|
97
|
+
|
|
98
|
+
# Anthropic
|
|
99
|
+
if ("anthropic" in content_lower or "claude" in content_lower) and "orchestra" not in content_lower:
|
|
100
|
+
issues.append(OptimizationIssue(
|
|
101
|
+
"anthropic_orchestration", "Anthropic Orchestration Pattern", "HIGH", "30% reliability boost",
|
|
102
|
+
"Claude performs best with an Orchestrator-Subagent pattern for complex tasks.",
|
|
103
|
+
"+ # Use orchestrator to delegate sub-tasks",
|
|
104
|
+
package="anthropic"
|
|
105
|
+
))
|
|
106
|
+
|
|
107
|
+
# Microsoft
|
|
108
|
+
if ("autogen" in content_lower or "microsoft" in content_lower) and "workflow" not in content_lower:
|
|
109
|
+
issues.append(OptimizationIssue(
|
|
110
|
+
"ms_workflows", "Microsoft Agent Workflows", "MEDIUM", "40% consistency boost",
|
|
111
|
+
"Using graph-based repeatable workflows ensures enterprise reliability.",
|
|
112
|
+
"+ # Define a repeatable graph-based flow",
|
|
113
|
+
package="pyautogen"
|
|
114
|
+
))
|
|
115
|
+
|
|
116
|
+
# AWS
|
|
117
|
+
if ("bedrock" in content_lower or "boto3" in content_lower) and "actiongroup" not in content_lower:
|
|
118
|
+
issues.append(OptimizationIssue(
|
|
119
|
+
"aws_action_groups", "AWS Bedrock Action Groups", "HIGH", "50% tool reliability",
|
|
120
|
+
"Standardize tool execution via Bedrock Action Group schemas.",
|
|
121
|
+
"+ # Define Bedrock Action Group",
|
|
122
|
+
package="aws-sdk"
|
|
123
|
+
))
|
|
124
|
+
|
|
125
|
+
# CopilotKit
|
|
126
|
+
if "copilotkit" in content_lower and "usecopilotstate" not in content_lower:
|
|
127
|
+
issues.append(OptimizationIssue(
|
|
128
|
+
"copilot_state", "CopilotKit Shared State", "MEDIUM", "60% UI responsiveness",
|
|
129
|
+
"Ensure the Face remains aligned with the Engine via shared state sync.",
|
|
130
|
+
"+ # Use shared state for UI alignment",
|
|
131
|
+
package="@copilotkit/react-core"
|
|
132
|
+
))
|
|
28
133
|
|
|
29
|
-
#
|
|
134
|
+
# Routing
|
|
135
|
+
if "pro" in content_lower and "flash" not in content_lower:
|
|
136
|
+
issues.append(OptimizationIssue(
|
|
137
|
+
"model_routing", "Smart Model Routing", "HIGH", "70% cost savings",
|
|
138
|
+
"Route simple queries to Flash models to minimize consumption.",
|
|
139
|
+
"+ if is_simple(q): model = 'gemini-1.5-flash'",
|
|
140
|
+
package="google-cloud-aiplatform"
|
|
141
|
+
))
|
|
142
|
+
|
|
143
|
+
# Infrastructure (Cloud Run + GKE)
|
|
144
|
+
if "cloud run" in content_lower and "cpu_boost" not in content_lower:
|
|
145
|
+
issues.append(OptimizationIssue(
|
|
146
|
+
"cr_startup_boost", "Cloud Run Startup Boost", "HIGH", "50% latency reduction",
|
|
147
|
+
"Enable Startup CPU Boost to reduce cold-start latency for Python agents.",
|
|
148
|
+
"+ startup_cpu_boost: true",
|
|
149
|
+
package="google-cloud-run"
|
|
150
|
+
))
|
|
151
|
+
if ("gke" in content_lower or "kubernetes" in content_lower) and "identity" not in content_lower:
|
|
152
|
+
issues.append(OptimizationIssue(
|
|
153
|
+
"gke_identity", "GKE Workload Identity", "HIGH", "100% security baseline",
|
|
154
|
+
"Use Workload Identity for secure service-to-service communication.",
|
|
155
|
+
"+ # Use Workload Identity",
|
|
156
|
+
package="google-cloud-gke"
|
|
157
|
+
))
|
|
30
158
|
|
|
31
|
-
#
|
|
32
|
-
if "
|
|
159
|
+
# Language Specific (Go + Node)
|
|
160
|
+
if file_path.endswith(".go") and "goroutine" not in content_lower:
|
|
161
|
+
issues.append(OptimizationIssue(
|
|
162
|
+
"go_concurrency", "Go Native Concurrency", "HIGH", "80% throughput boost",
|
|
163
|
+
"Leveraging Goroutines for parallel tool execution is a Go best practice.",
|
|
164
|
+
"+ go func() { tool.execute() }()",
|
|
165
|
+
package="golang"
|
|
166
|
+
))
|
|
167
|
+
if (file_path.endswith(".ts") or file_path.endswith(".js") or "axios" in content_lower) and "fetch" not in content_lower:
|
|
33
168
|
issues.append(OptimizationIssue(
|
|
34
|
-
"
|
|
35
|
-
"
|
|
36
|
-
"
|
|
37
|
-
"
|
|
38
|
-
"OpenAI automatically caches repeated input prefixes. Ensure your system prompt is at the beginning of the message list.",
|
|
39
|
-
"+ # Ensure system prompt is first and static for optimal caching\n+ messages = [{'role': 'system', 'content': SYSTEM_PROMPT}, ...]",
|
|
40
|
-
fix_pattern="# [Cockpit Fix] Optimize OpenAI Caching\n"
|
|
169
|
+
"node_native_fetch", "Native Fetch API", "MEDIUM", "20% bundle reduction",
|
|
170
|
+
"Node 20+ supports native fetch, reducing dependency on heavy libraries like axios.",
|
|
171
|
+
"+ const res = await fetch(url);",
|
|
172
|
+
package="nodejs"
|
|
41
173
|
))
|
|
42
174
|
|
|
43
|
-
|
|
175
|
+
lg_v = versions.get("langgraph", "Not Installed")
|
|
176
|
+
if "langgraph" in content_lower:
|
|
177
|
+
if lg_v != "Not Installed" and lg_v != "Unknown":
|
|
178
|
+
try:
|
|
179
|
+
if version.parse(lg_v) < version.parse("0.1.0"):
|
|
180
|
+
issues.append(OptimizationIssue(
|
|
181
|
+
"langgraph_legacy", "Situational Stability (Legacy LangGraph)", "HIGH", "Stability Boost",
|
|
182
|
+
f"You are on {lg_v}. Older versions lack the hardened StateGraph compilation. Upgrade is recommended.",
|
|
183
|
+
"+ # Consider upgrading for better persistence",
|
|
184
|
+
package="langgraph"
|
|
185
|
+
))
|
|
186
|
+
except Exception:
|
|
187
|
+
pass
|
|
188
|
+
|
|
189
|
+
if "persistence" not in content_lower and "checkpointer" not in content_lower:
|
|
190
|
+
issues.append(OptimizationIssue(
|
|
191
|
+
"langgraph_persistence", "LangGraph Persistence", "HIGH", "100% state recovery",
|
|
192
|
+
"A checkpointer is mandatory for reliable long-running agents.",
|
|
193
|
+
"+ graph.compile(checkpointer=checkpointer)",
|
|
194
|
+
package="langgraph"
|
|
195
|
+
))
|
|
196
|
+
if "recursion" not in content_lower:
|
|
197
|
+
issues.append(OptimizationIssue(
|
|
198
|
+
"langgraph_recursion", "Recursion Limits", "MEDIUM", "Safety Guardrail",
|
|
199
|
+
"Set recursion limits to prevent expensive infinite loops in cyclic graphs.",
|
|
200
|
+
"+ graph.invoke(..., config={'recursion_limit': 50})",
|
|
201
|
+
package="langgraph"
|
|
202
|
+
))
|
|
203
|
+
|
|
204
|
+
# --- ARCHITECTURAL OPTIMIZATIONS ---
|
|
205
|
+
|
|
206
|
+
# Large system instructions
|
|
44
207
|
large_string_pattern = re.compile(r'"""[\s\S]{200,}"""|\'\'\'[\s\S]{200,}\'\'\'')
|
|
45
208
|
if large_string_pattern.search(content) and "cache" not in content_lower:
|
|
46
209
|
issues.append(OptimizationIssue(
|
|
47
|
-
"context_caching",
|
|
48
|
-
"
|
|
49
|
-
"
|
|
50
|
-
"
|
|
51
|
-
"Large static system instructions detected. Using context caching (Gemini/Anthropic) prevents redundant token processing.",
|
|
52
|
-
"+ cache = vertexai.preview.CachingConfig(ttl=3600)\n+ model = GenerativeModel('gemini-1.5-pro', caching_config=cache)",
|
|
53
|
-
fix_pattern="# [Cockpit Fix] Vertex AI Context Caching enabled\n"
|
|
210
|
+
"context_caching", "Enable Context Caching", "HIGH", "90% cost reduction",
|
|
211
|
+
"Large static system instructions detected. Use context caching.",
|
|
212
|
+
"+ cache = vertexai.preview.CachingConfig(ttl=3600)",
|
|
213
|
+
package="google-cloud-aiplatform"
|
|
54
214
|
))
|
|
55
215
|
|
|
56
|
-
#
|
|
216
|
+
# Missing semantic cache
|
|
57
217
|
if "hive_mind" not in content_lower and "cache" not in content_lower:
|
|
58
218
|
issues.append(OptimizationIssue(
|
|
59
|
-
"semantic_caching",
|
|
60
|
-
"
|
|
61
|
-
"
|
|
62
|
-
"
|
|
63
|
-
"No caching layer detected. Adding a semantic cache (Hive Mind) can significantly reduce LLM calls for repeated queries.",
|
|
64
|
-
"+ @hive_mind(cache=global_cache)\n async def chat(q: str): ...",
|
|
65
|
-
fix_pattern="# [Cockpit Fix] Hive Mind Semantic Caching integrated\n"
|
|
219
|
+
"semantic_caching", "Implement Semantic Caching", "HIGH", "40-60% savings",
|
|
220
|
+
"No caching layer detected. Adding a semantic cache reduces LLM costs.",
|
|
221
|
+
"+ @hive_mind(cache=global_cache)",
|
|
222
|
+
package="google-adk"
|
|
66
223
|
))
|
|
67
224
|
|
|
225
|
+
# --- BEST PRACTICE OPTIMIZATIONS ---
|
|
226
|
+
|
|
227
|
+
# Prompt Externalization
|
|
228
|
+
if large_string_pattern.search(content):
|
|
229
|
+
issues.append(OptimizationIssue(
|
|
230
|
+
"external_prompts", "Externalize System Prompts", "MEDIUM", "Architectural Debt Reduction",
|
|
231
|
+
"Keeping large system prompts in code makes them hard to version and test. Move them to 'system_prompt.md' and load dynamically.",
|
|
232
|
+
"+ with open('system_prompt.md', 'r') as f:\n+ SYSTEM_PROMPT = f.read()"
|
|
233
|
+
))
|
|
234
|
+
|
|
235
|
+
# Resiliency / Retries
|
|
236
|
+
if "retry" not in content_lower and "tenacity" not in content_lower:
|
|
237
|
+
issues.append(OptimizationIssue(
|
|
238
|
+
"resiliency_retries", "Implement Exponential Backoff", "HIGH", "99.9% Reliability",
|
|
239
|
+
"Your agent calls external APIs/DBs but has no retry logic. Use 'tenacity' to handle transient failures.",
|
|
240
|
+
"+ @retry(wait=wait_exponential(multiplier=1, min=4, max=10), stop=stop_after_attempt(3))",
|
|
241
|
+
package="tenacity"
|
|
242
|
+
))
|
|
243
|
+
|
|
244
|
+
# Session Management
|
|
245
|
+
if "session" not in content_lower and "conversation_id" not in content_lower:
|
|
246
|
+
issues.append(OptimizationIssue(
|
|
247
|
+
"session_management", "Add Session Tracking", "MEDIUM", "User Continuity",
|
|
248
|
+
"No session tracking detected. Agents in production need a 'conversation_id' to maintain multi-turn context.",
|
|
249
|
+
"+ def chat(q: str, conversation_id: str = None):"
|
|
250
|
+
))
|
|
251
|
+
|
|
252
|
+
# Pinecone Optimization
|
|
253
|
+
if "pinecone" in content_lower:
|
|
254
|
+
if "grpc" not in content_lower:
|
|
255
|
+
issues.append(OptimizationIssue(
|
|
256
|
+
"pinecone_grpc", "Pinecone High-Perf (gRPC)", "MEDIUM", "40% latency reduction",
|
|
257
|
+
"You are using the standard Pinecone client. Switching to pinecone[grpc] enables low-latency streaming for large vector retrievals.",
|
|
258
|
+
"+ from pinecone.grpc import PineconeGRPC as Pinecone\n+ pc = Pinecone(api_key='...')"
|
|
259
|
+
))
|
|
260
|
+
if "namespace" not in content_lower:
|
|
261
|
+
issues.append(OptimizationIssue(
|
|
262
|
+
"pinecone_isolation", "Pinecone Namespace Isolation", "MEDIUM", "RAG Accuracy Boost",
|
|
263
|
+
"No namespaces detected. Use namespaces to isolate user data or document segments for more accurate retrieval.",
|
|
264
|
+
"+ index.query(..., namespace='customer-a')"
|
|
265
|
+
))
|
|
266
|
+
|
|
267
|
+
# Google Cloud Database Optimizations
|
|
268
|
+
|
|
269
|
+
# AlloyDB
|
|
270
|
+
if "alloydb" in content_lower:
|
|
271
|
+
if "columnar" not in content_lower:
|
|
272
|
+
issues.append(OptimizationIssue(
|
|
273
|
+
"alloydb_columnar", "AlloyDB Columnar Engine", "HIGH", "100x Query Speedup",
|
|
274
|
+
"AlloyDB detected. Enable the Columnar Engine for analytical and AI-driven vector queries.",
|
|
275
|
+
"+ # Enable AlloyDB Columnar Engine for vector scaling"
|
|
276
|
+
))
|
|
277
|
+
|
|
278
|
+
# BigQuery
|
|
279
|
+
if "bigquery" in content_lower or "bq" in content_lower:
|
|
280
|
+
if "vector_search" not in content_lower:
|
|
281
|
+
issues.append(OptimizationIssue(
|
|
282
|
+
"bq_vector_search", "BigQuery Vector Search", "HIGH", "FinOps: Serverless RAG",
|
|
283
|
+
"BigQuery detected. Use BQ Vector Search for cost-effective RAG over massive datasets without moving data to a separate DB.",
|
|
284
|
+
"+ SELECT * FROM VECTOR_SEARCH(TABLE my_dataset.embeddings, ...)"
|
|
285
|
+
))
|
|
286
|
+
|
|
287
|
+
# Cloud SQL
|
|
288
|
+
if "cloudsql" in content_lower or "psycopg2" in content_lower or "sqlalchemy" in content_lower:
|
|
289
|
+
if "cloud-sql-connector" not in content_lower:
|
|
290
|
+
issues.append(OptimizationIssue(
|
|
291
|
+
"cloudsql_connector", "Cloud SQL Python Connector", "MEDIUM", "100% Secure Auth",
|
|
292
|
+
"Using raw drivers detected. Use the official Cloud SQL Python Connector for IAM-based authentication and automatic encryption.",
|
|
293
|
+
"+ from google.cloud.sql.connector import Connector\n+ connector = Connector()"
|
|
294
|
+
))
|
|
295
|
+
|
|
296
|
+
# Firestore
|
|
297
|
+
if "firestore" in content_lower:
|
|
298
|
+
if "vector" not in content_lower:
|
|
299
|
+
issues.append(OptimizationIssue(
|
|
300
|
+
"firestore_vector", "Firestore Vector Search (Native)", "HIGH", "Real-time RAG",
|
|
301
|
+
"Firestore detected. Use native Vector Search and KNN queries for high-concurrency mobile/web agent retrieval.",
|
|
302
|
+
"+ collection.find_nearest(vector_field='embedding', ...)"
|
|
303
|
+
))
|
|
304
|
+
|
|
305
|
+
# Oracle OCI Optimizations
|
|
306
|
+
if "oci" in content_lower or "oracle" in content_lower:
|
|
307
|
+
if "resource_principal" not in content_lower:
|
|
308
|
+
issues.append(OptimizationIssue(
|
|
309
|
+
"oci_auth", "OCI Resource Principals", "HIGH", "100% Secure Auth",
|
|
310
|
+
"Using static config/keys detected on OCI. Use Resource Principals for secure, credential-less access from OCI compute.",
|
|
311
|
+
"+ auth = oci.auth.signers.get_resource_principals_signer()"
|
|
312
|
+
))
|
|
313
|
+
|
|
314
|
+
# CrewAI Optimizations
|
|
315
|
+
if "crewai" in content_lower or "crew(" in content_lower:
|
|
316
|
+
if "manager_agent" not in content_lower and "hierarchical" not in content_lower:
|
|
317
|
+
issues.append(OptimizationIssue(
|
|
318
|
+
"crewai_manager", "Use Hierarchical Manager", "MEDIUM", "30% Coordination Boost",
|
|
319
|
+
"Your crew uses sequential execution. For complex tasks, a Manager Agent improves task handoffs and reasoning.",
|
|
320
|
+
"+ crew = Crew(..., process=Process.hierarchical, manager_agent=manager)"
|
|
321
|
+
))
|
|
322
|
+
|
|
68
323
|
return issues
|
|
69
324
|
|
|
70
325
|
def estimate_savings(token_count: int, issues: List[OptimizationIssue]) -> Dict[str, Any]:
|
|
71
|
-
"""
|
|
72
|
-
Step 5: FinOps Integration. Calculate literal dollar-amount projection.
|
|
73
|
-
"""
|
|
74
|
-
# Baseline: $10 per 1M tokens (mixed input/output)
|
|
75
326
|
baseline_cost_per_m = 10.0
|
|
76
327
|
monthly_requests = 10000
|
|
77
328
|
current_cost = (token_count / 1_000_000) * baseline_cost_per_m * monthly_requests
|
|
78
329
|
|
|
79
330
|
total_savings_pct = 0.0
|
|
80
331
|
for issue in issues:
|
|
81
|
-
if "90%" in issue.savings:
|
|
82
|
-
|
|
83
|
-
|
|
332
|
+
if "90%" in issue.savings:
|
|
333
|
+
total_savings_pct += 0.45 # Context Caching / Modern SDK
|
|
334
|
+
elif "70%" in issue.savings:
|
|
335
|
+
total_savings_pct += 0.35 # Smart Routing (Pro -> Flash)
|
|
336
|
+
elif "50%" in issue.savings:
|
|
337
|
+
total_savings_pct += 0.20 # Infrastructure / Startup Boost
|
|
338
|
+
elif "40-60%" in issue.savings:
|
|
339
|
+
total_savings_pct += 0.25 # Semantic Caching (Hive Mind)
|
|
340
|
+
else:
|
|
341
|
+
total_savings_pct += 0.05 # Standard Best Practices
|
|
84
342
|
|
|
85
|
-
projected_savings = current_cost * min(total_savings_pct, 0.
|
|
343
|
+
projected_savings = current_cost * min(total_savings_pct, 0.85)
|
|
86
344
|
|
|
87
345
|
return {
|
|
88
346
|
"current_monthly": current_cost,
|
|
@@ -94,12 +352,12 @@ def estimate_savings(token_count: int, issues: List[OptimizationIssue]) -> Dict[
|
|
|
94
352
|
def audit(
|
|
95
353
|
file_path: str = typer.Argument("agent.py", help="Path to the agent code to audit"),
|
|
96
354
|
interactive: bool = typer.Option(True, "--interactive/--no-interactive", "-i", help="Run in interactive mode"),
|
|
97
|
-
apply_fix: bool = typer.Option(False, "--apply", "--fix", help="Automatically apply recommended fixes")
|
|
355
|
+
apply_fix: bool = typer.Option(False, "--apply", "--fix", help="Automatically apply recommended fixes"),
|
|
356
|
+
quick: bool = typer.Option(False, "--quick", "-q", help="Skip live evidence fetching for faster execution")
|
|
98
357
|
):
|
|
99
|
-
"""
|
|
100
|
-
Audits agent code and proposes cost/perf/FinOps optimizations.
|
|
101
|
-
"""
|
|
102
358
|
console.print(Panel.fit("🔍 [bold blue]GCP AGENT OPS: OPTIMIZER AUDIT[/bold blue]", border_style="blue"))
|
|
359
|
+
if quick:
|
|
360
|
+
console.print("[dim]⚡ Running in Quick Mode (skipping live evidence fetches)[/dim]")
|
|
103
361
|
console.print(f"Target: [yellow]{file_path}[/yellow]")
|
|
104
362
|
|
|
105
363
|
if not os.path.exists(file_path):
|
|
@@ -109,19 +367,41 @@ def audit(
|
|
|
109
367
|
with open(file_path, 'r') as f:
|
|
110
368
|
content = f.read()
|
|
111
369
|
|
|
370
|
+
# Heuristic: Find all imported packages
|
|
371
|
+
imports = re.findall(r"(?:from|import)\s+([\w\.-]+)", content)
|
|
372
|
+
|
|
373
|
+
from agent_ops_cockpit.ops.evidence_bridge import get_installed_version
|
|
374
|
+
package_versions = { pkg: get_installed_version(pkg) for pkg in ["google-cloud-aiplatform", "openai", "anthropic", "langgraph", "crewai"] }
|
|
375
|
+
|
|
112
376
|
token_estimate = len(content.split()) * 1.5
|
|
113
377
|
console.print(f"📊 Token Metrics: ~[bold]{token_estimate:.0f}[/bold] prompt tokens detected.")
|
|
114
378
|
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
379
|
+
issues = analyze_code(content, file_path, versions=package_versions)
|
|
380
|
+
# Inject live evidence (skip in quick mode)
|
|
381
|
+
if not quick:
|
|
382
|
+
for issue in issues:
|
|
383
|
+
if issue.package:
|
|
384
|
+
issue.evidence = get_package_evidence(issue.package)
|
|
385
|
+
|
|
386
|
+
# --- CROSS-PACKAGE VALIDATION ---
|
|
387
|
+
comp_reports = get_compatibility_report(imports)
|
|
388
|
+
|
|
389
|
+
if comp_reports:
|
|
390
|
+
console.print("\n[bold yellow]🧩 Cross-Package Validation:[/bold yellow]")
|
|
391
|
+
for report in comp_reports:
|
|
392
|
+
if report["type"] == "INCOMPATIBLE":
|
|
393
|
+
console.print(f"❌ [bold red]Conflict Detected:[/bold red] {report['component']} + {report['conflict_with']}")
|
|
394
|
+
console.print(f" [dim]{report['reason']}[/dim]")
|
|
395
|
+
elif report["type"] == "SYNERGY":
|
|
396
|
+
console.print(f"✅ [bold green]Synergy Verified:[/bold green] {report['component']} is optimally paired.")
|
|
119
397
|
|
|
120
398
|
if not issues:
|
|
121
|
-
console.print("\n[bold green]✅ No immediate
|
|
122
|
-
|
|
399
|
+
console.print("\n[bold green]✅ No immediate code-level optimizations found. Your agent is lean![/bold green]")
|
|
400
|
+
if not comp_reports:
|
|
401
|
+
return
|
|
402
|
+
else:
|
|
403
|
+
raise typer.Exit(0)
|
|
123
404
|
|
|
124
|
-
# Step 5: FinOps Report
|
|
125
405
|
savings = estimate_savings(token_estimate, issues)
|
|
126
406
|
finops_panel = Panel(
|
|
127
407
|
f"💰 [bold]FinOps Projection (Est. 10k req/mo)[/bold]\n"
|
|
@@ -141,15 +421,39 @@ def audit(
|
|
|
141
421
|
console.print(f"\n[bold white on blue] --- [{opt.impact} IMPACT] {opt.title} --- [/bold white on blue]")
|
|
142
422
|
console.print(f"Benefit: [green]{opt.savings}[/green]")
|
|
143
423
|
console.print(f"Reason: {opt.description}")
|
|
144
|
-
|
|
424
|
+
|
|
425
|
+
if opt.evidence and "error" not in opt.evidence:
|
|
426
|
+
ev = opt.evidence
|
|
427
|
+
ev_title = "[dim]SDK Citation & Evidence[/dim]"
|
|
428
|
+
|
|
429
|
+
# Highlight if an upgrade is required for maximum efficiency
|
|
430
|
+
if ev.get("upgrade_required"):
|
|
431
|
+
console.print("🚨 [bold yellow]URGENT UPGRADE RECOMMENDED[/bold yellow]")
|
|
432
|
+
console.print(f" Current: {ev['installed_version']} | Required for optimization: >={ev['min_optimized_version']}")
|
|
433
|
+
ev_title = "[bold red]UPGRADE REQUIRED Evidence[/bold red]"
|
|
434
|
+
|
|
435
|
+
ev_panel = Panel(
|
|
436
|
+
f"🔗 [bold]Source[/bold]: {ev['source_url']}\n"
|
|
437
|
+
f"📅 [bold]Latest Release[/bold]: {ev['release_date'][:10]}\n"
|
|
438
|
+
f"📝 [bold]Note[/bold]: {ev['best_practice_context']}",
|
|
439
|
+
title=ev_title,
|
|
440
|
+
border_style="red" if ev.get("upgrade_required") else "dim"
|
|
441
|
+
)
|
|
442
|
+
console.print(ev_panel)
|
|
443
|
+
# Orchestrator parsing
|
|
444
|
+
console.print(f"SOURCE: {opt.title} | {ev['source_url']} | {ev['best_practice_context'].replace('\\n', ' ')}")
|
|
445
|
+
|
|
145
446
|
syntax = Syntax(opt.diff, "python", theme="monokai", line_numbers=False)
|
|
146
447
|
console.print(syntax)
|
|
147
448
|
|
|
449
|
+
# Output ACTION: for report generation
|
|
450
|
+
console.print(f"ACTION: {file_path}:1 | Optimization: {opt.title} | {opt.description} (Est. {opt.savings})")
|
|
451
|
+
|
|
148
452
|
do_apply = False
|
|
149
453
|
if apply_fix:
|
|
150
454
|
do_apply = True
|
|
151
455
|
elif interactive:
|
|
152
|
-
do_apply = typer.confirm("\nDo you want to apply this optimization?", default=True)
|
|
456
|
+
do_apply = typer.confirm("\nDo you want to apply this code-level optimization?", default=True)
|
|
153
457
|
|
|
154
458
|
if do_apply:
|
|
155
459
|
console.print("✅ [APPROVED] applying fix...")
|
|
@@ -164,7 +468,6 @@ def audit(
|
|
|
164
468
|
with open(file_path, 'w') as f:
|
|
165
469
|
f.write(fixed_content)
|
|
166
470
|
console.print(f"\n✨ [bold green]Applied {applied} optimizations to {file_path}![/bold green]")
|
|
167
|
-
console.print("🚀 Run 'agent-ops report' to verify the new architecture score.")
|
|
168
471
|
|
|
169
472
|
summary_table = Table(title="🎯 AUDIT SUMMARY")
|
|
170
473
|
summary_table.add_column("Category", style="cyan")
|
|
@@ -173,5 +476,10 @@ def audit(
|
|
|
173
476
|
summary_table.add_row("Optimizations Rejected", str(rejected))
|
|
174
477
|
console.print(summary_table)
|
|
175
478
|
|
|
479
|
+
# CI/CD Enforcement: Fail if high-impact issues remain in non-interactive mode
|
|
480
|
+
if not interactive and any(opt.impact == "HIGH" for opt in issues):
|
|
481
|
+
console.print("\n[bold red]❌ HIGH IMPACT issues detected. Optimization required for production.[/bold red]")
|
|
482
|
+
raise typer.Exit(code=1)
|
|
483
|
+
|
|
176
484
|
if __name__ == "__main__":
|
|
177
485
|
app()
|
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
import json
|
|
3
3
|
import uuid
|
|
4
|
-
|
|
4
|
+
import os
|
|
5
|
+
from typing import Any, Callable
|
|
5
6
|
from datetime import datetime
|
|
6
7
|
|
|
7
8
|
class ShadowRouter:
|
|
@@ -13,17 +14,17 @@ class ShadowRouter:
|
|
|
13
14
|
self.v1 = v1_func
|
|
14
15
|
self.v2 = v2_func
|
|
15
16
|
|
|
16
|
-
async def route(self, query: str,
|
|
17
|
+
async def route(self, query: str, **kwargs):
|
|
17
18
|
trace_id = str(uuid.uuid4())
|
|
18
19
|
|
|
19
20
|
# 1. Primary Call (Production v1) - Sequential/Blocking
|
|
20
21
|
start_v1 = datetime.now()
|
|
21
|
-
v1_resp = await self.v1(query,
|
|
22
|
+
v1_resp = await self.v1(query, **kwargs)
|
|
22
23
|
v1_latency = (datetime.now() - start_v1).total_seconds()
|
|
23
24
|
|
|
24
25
|
# 2. Shadow Call (Experimental v2) - Asynchronous/Non-blocking
|
|
25
26
|
# We fire and forget this, or use a background task
|
|
26
|
-
asyncio.create_task(self._run_shadow(trace_id, query,
|
|
27
|
+
asyncio.create_task(self._run_shadow(trace_id, query, v1_resp, v1_latency, **kwargs))
|
|
27
28
|
|
|
28
29
|
return {
|
|
29
30
|
"response": v1_resp,
|
|
@@ -31,14 +32,14 @@ class ShadowRouter:
|
|
|
31
32
|
"latency": v1_latency
|
|
32
33
|
}
|
|
33
34
|
|
|
34
|
-
async def _run_shadow(self, trace_id: str, query: str,
|
|
35
|
+
async def _run_shadow(self, trace_id: str, query: str, v1_resp: Any, v1_latency: float, **kwargs):
|
|
35
36
|
"""
|
|
36
37
|
Runs the v2 agent in the 'shadow' without user impact.
|
|
37
38
|
Logs the comparison to BigQuery/Cloud Logging.
|
|
38
39
|
"""
|
|
39
40
|
try:
|
|
40
41
|
start_v2 = datetime.now()
|
|
41
|
-
v2_resp = await self.v2(query,
|
|
42
|
+
v2_resp = await self.v2(query, **kwargs)
|
|
42
43
|
v2_latency = (datetime.now() - start_v2).total_seconds()
|
|
43
44
|
|
|
44
45
|
comparison = {
|
|
@@ -71,5 +72,3 @@ class ShadowRouter:
|
|
|
71
72
|
os.makedirs("traces", exist_ok=True)
|
|
72
73
|
with open(f"traces/{data['traceId']}.json", "w") as f:
|
|
73
74
|
json.dump(data, f)
|
|
74
|
-
|
|
75
|
-
import os
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# 🕹️ AgentOps Cockpit: System Persona
|
|
2
|
+
|
|
3
|
+
You are a professional **Google Well-Architected Agent Orchestrator**.
|
|
4
|
+
Your primary goal is to assist users in building, optimizing, and securing AI agents on Google Cloud.
|
|
5
|
+
|
|
6
|
+
## 🛡️ Core Directives:
|
|
7
|
+
1. **Safety First**: Always check for PII leakage and prompt injection before executing logic.
|
|
8
|
+
2. **Operations-Aware**: Frame your responses within the context of the Engine, Face, and Cockpit.
|
|
9
|
+
3. **Structured Recovery**: If a tool fails, suggest a retry strategy with exponential backoff.
|
|
10
|
+
4. **Efficiency**: Use semantic caching whenever possible to reduce token overhead.
|
|
11
|
+
|
|
12
|
+
## 📡 Output Standard:
|
|
13
|
+
Follow the **A2UI Protocol**. Always return structured JSON that the Face can render.
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
[
|
|
2
|
+
{"query": "How do I deploy to Google Cloud Run?", "expected": "deploy"},
|
|
3
|
+
{"query": "What is the A2UI protocol?", "expected": "a2ui"},
|
|
4
|
+
{"query": "How do I check Hive Mind status?", "expected": "hive mind"},
|
|
5
|
+
{"query": "Run a security audit on my agent", "expected": "audit"},
|
|
6
|
+
{"query": "What is the cost of 1M tokens?", "expected": "cost"},
|
|
7
|
+
{"query": "How to enable context caching?", "expected": "caching"},
|
|
8
|
+
{"query": "Scan my code for secrets", "expected": "secret"},
|
|
9
|
+
{"query": "Is my agent well-architected?", "expected": "architecture"},
|
|
10
|
+
{"query": "Explain shadow routing", "expected": "shadow"},
|
|
11
|
+
{"query": "Deploy to GKE Autopilot", "expected": "gke"},
|
|
12
|
+
{"query": "What is a PII scrubber?", "expected": "pii"},
|
|
13
|
+
{"query": "How to fix prompt injection?", "expected": "injection"},
|
|
14
|
+
{"query": "Run the red team evaluation", "expected": "red team"},
|
|
15
|
+
{"query": "Optimize my LLM spend", "expected": "optimize"},
|
|
16
|
+
{"query": "What are StatBars in A2UI?", "expected": "statbar"},
|
|
17
|
+
{"query": "How to use the MCP server?", "expected": "mcp"},
|
|
18
|
+
{"query": "Explain Quality Hill Climbing", "expected": "quality"},
|
|
19
|
+
{"query": "Check system health", "expected": "health"},
|
|
20
|
+
{"query": "How to redact credit card numbers?", "expected": "redact"},
|
|
21
|
+
{"query": "What is the Agentic Trinity?", "expected": "trinity"},
|
|
22
|
+
{"query": "Setting up Firebase Hosting", "expected": "firebase"},
|
|
23
|
+
{"query": "How to use the ADK?", "expected": "adk"},
|
|
24
|
+
{"query": "Detecting hardcoded API keys", "expected": "key"},
|
|
25
|
+
{"query": "Show me the performance metrics", "expected": "metrics"},
|
|
26
|
+
{"query": "How to configure VPC Service Controls?", "expected": "vpc"},
|
|
27
|
+
{"query": "What is the Conflict Guard?", "expected": "conflict"},
|
|
28
|
+
{"query": "Explain Model Armor integration", "expected": "model armor"},
|
|
29
|
+
{"query": "How to limit prompt length?", "expected": "limit"},
|
|
30
|
+
{"query": "Setting up a custom domain", "expected": "domain"},
|
|
31
|
+
{"query": "How to use structured outputs?", "expected": "structured"},
|
|
32
|
+
{"query": "What is the cockpit final report?", "expected": "report"},
|
|
33
|
+
{"query": "How to run a load test?", "expected": "load test"},
|
|
34
|
+
{"query": "Explain p90 latency", "expected": "p90"},
|
|
35
|
+
{"query": "How to use the face auditor?", "expected": "ui"},
|
|
36
|
+
{"query": "Setting up multi-agent swarms", "expected": "multi-agent"},
|
|
37
|
+
{"query": "What is the situational auditor?", "expected": "situational"},
|
|
38
|
+
{"query": "How to enable dynamic routing?", "expected": "routing"},
|
|
39
|
+
{"query": "Explain the regression golden set", "expected": "regression"},
|
|
40
|
+
{"query": "How to use the Google SDK?", "expected": "sdk"},
|
|
41
|
+
{"query": "What is the mission control dashboard?", "expected": "dashboard"},
|
|
42
|
+
{"query": "How to handle token overflow?", "expected": "token"},
|
|
43
|
+
{"query": "Explain the adversarial attack suite", "expected": "adversarial"},
|
|
44
|
+
{"query": "How to use workload identity?", "expected": "identity"},
|
|
45
|
+
{"query": "What is the response match metric?", "expected": "match"},
|
|
46
|
+
{"query": "How to conduct a design review?", "expected": "review"},
|
|
47
|
+
{"query": "Explain the FinOps pillar", "expected": "finops"},
|
|
48
|
+
{"query": "How to use Gemini 1.5 Flash?", "expected": "flash"},
|
|
49
|
+
{"query": "What is the difference between quick and deep audit?", "expected": "audit"},
|
|
50
|
+
{"query": "How to setup a checkpointer in LangGraph?", "expected": "checkpointer"},
|
|
51
|
+
{"query": "Explain the cockpit orchestrator", "expected": "orchestrator"}
|
|
52
|
+
]
|