agentops-cockpit 0.2.2__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,25 @@
1
+ from pydantic import BaseModel, Field
2
+ from typing import List, Optional, Dict, Any
3
+ from datetime import datetime
4
+
5
+ class EvidenceNode(BaseModel):
6
+ """A single piece of evidence or source used by the agent."""
7
+ source_id: str
8
+ source_type: str # e.g., "doc", "web", "tool_query"
9
+ snippet: str
10
+ relevance_score: float = Field(ge=0.0, le=1.0)
11
+
12
+ class AgentEvidencePacket(BaseModel):
13
+ """
14
+ Standard 'Evidence Packet' format.
15
+ Ensures every agent response has a clear, auditable trail of information.
16
+ """
17
+ timestamp: str = Field(default_factory=lambda: datetime.utcnow().isoformat())
18
+ reasoning_path: List[str] = Field(default_factory=list)
19
+ sources: List[EvidenceNode] = Field(default_factory=list)
20
+ tool_calls: List[Dict[str, Any]] = Field(default_factory=list)
21
+ token_usage: Optional[Dict[str, int]] = None
22
+
23
+ def pack_evidence(response_data: Dict[str, Any]) -> AgentEvidencePacket:
24
+ """Utility to formalize agent debug data into a sharable evidence packet."""
25
+ return AgentEvidencePacket(**response_data)
@@ -0,0 +1,407 @@
1
+ import os
2
+ import re
3
+ from typing import Dict, List, Any
4
+
5
+ # --- CHECKLISTS ---
6
+
7
+ GOOGLE_CHECKLIST = [
8
+ {
9
+ "category": "🏗️ Core Architecture (Google)",
10
+ "checks": [
11
+ ("Runtime: Is the agent running on Cloud Run or GKE?", "Critical for scalability and cost."),
12
+ ("Framework: Is ADK used for tool orchestration?", "Google-standard for agent-tool communication."),
13
+ ("Sandbox: Is Code Execution running in Vertex AI Sandbox?", "Prevents malicious code execution."),
14
+ ("Backend: Is FastAPI used for the Engine layer?", "Industry-standard for high-concurrency agent apps.")
15
+ ]
16
+ },
17
+ {
18
+ "category": "🛡️ Security & Privacy",
19
+ "checks": [
20
+ ("PII: Is a scrubber active before sending data to LLM?", "Compliance requirement (GDPR/SOC2)."),
21
+ ("Identity: Is IAM used for tool access?", "Ensures least-privilege security."),
22
+ ("Safety: Are Vertex AI Safety Filters configured?", "Protects against toxic generation.")
23
+ ]
24
+ },
25
+ {
26
+ "category": "📉 Optimization",
27
+ "checks": [
28
+ ("Caching: Is Semantic Caching (Hive Mind) enabled?", "Reduces LLM costs."),
29
+ ("Context: Are you using Context Caching?", "Critical for prompts > 32k tokens."),
30
+ ("Routing: Are you using Flash for simple tasks?", "Performance and cost optimization.")
31
+ ]
32
+ },
33
+ {
34
+ "category": "🌐 Infrastructure & Runtime",
35
+ "checks": [
36
+ ("Agent Engine: Are you using Vertex AI Reasoning Engine for deployment?", "Managed orchestration with built-in versioning and traces."),
37
+ ("Cloud Run: Is 'Startup CPU Boost' enabled?", "Critical for reducing cold-start latency in Python agents."),
38
+ ("GKE: Is Workload Identity used for IAM?", "Google-standard for secure service-to-service communication."),
39
+ ("VPC: Is VPC Service Controls (VPC SC) active?", "Prevents data exfiltration by isolating the agent environment.")
40
+ ]
41
+ },
42
+ {
43
+ "category": "🎭 Face (UI/UX)",
44
+ "checks": [
45
+ ("A2UI: Are components registered in the A2UIRenderer?", "Ensures engine-driven UI protocol compliance."),
46
+ ("Responsive: Are mobile-first media queries present in index.css?", "Ensures usability across devices (iOS/Android)."),
47
+ ("Accessibility: Do interactive elements have aria-labels?", "Critical for inclusive design and automated testing."),
48
+ ("Triggers: Are you using interactive triggers for state changes?", "Improves 'Agentic Feel' through reactive UI.")
49
+ ]
50
+ }
51
+ ]
52
+
53
+ OPENAI_CHECKLIST = [
54
+ {
55
+ "category": "🏗️ Core Architecture (OpenAI)",
56
+ "checks": [
57
+ ("APIs: Using Assistants API or Tool Calling?", "Enables structured interactions and memory."),
58
+ ("Models: Using Mini models for simple tasks?", "Cost-efficient routing (GPT-4o-mini)."),
59
+ ("Memory: Is thread-based persistence implemented?", "Ensures session continuity."),
60
+ ("Tooling: Are Function Definitions schema-validated?", "Prevents runtime tool execution errors."),
61
+ ("Routing: Are deterministic routers used for critical branches?", "Prevents LLM drift in sensitive workflows."),
62
+ ("Outputs: Is 'Structured Outputs' enabled for tool calls?", "Ensures data integrity and prevents injection.")
63
+ ]
64
+ },
65
+ {
66
+ "category": "🛡️ Security & Safety",
67
+ "checks": [
68
+ ("Moderation: Is the OpenAI Moderation API active?", "Prevents policy violations in user inputs/outputs."),
69
+ ("Secrets: Are API Keys managed via Env/Secret Manager?", "Prevents credential leakage."),
70
+ ("PII: Are PII Guardrails configured to block sensitive leaks?", "Required for production data handling."),
71
+ ("HITL: Is there a User Approval node for sensitive actions?", "Human-in-the-loop for non-idempotent changes.")
72
+ ]
73
+ },
74
+ {
75
+ "category": "📉 Optimization",
76
+ "checks": [
77
+ ("Caching: Are you leveraging OpenAI's automatic prompt caching?", "Automatic for repeated prefixes."),
78
+ ("Token Management: Is max_completion_tokens set?", "Prevents runaway generation costs."),
79
+ ("Streaming: Is streaming enabled for UI responsiveness?", "Critical for premium user experience.")
80
+ ]
81
+ }
82
+ ]
83
+
84
+ ANTHROPIC_CHECKLIST = [
85
+ {
86
+ "category": "🏗️ Core Architecture (Anthropic)",
87
+ "checks": [
88
+ ("Orchestration: Is an Orchestrator-Subagent pattern used?", "ANTHROPIC best practice for complex tasks."),
89
+ ("Loop: Is a structured Context-Action-Verify loop implemented?", "Ensures deterministic agent behavior."),
90
+ ("ACIs: Is the Agent-Computer Interface (ACI) well-documented?", "Detailed tool descriptions are critical for Claude.")
91
+ ]
92
+ },
93
+ {
94
+ "category": "🛡️ Security & Guardrails",
95
+ "checks": [
96
+ ("Sandbox: Are tool calls running in a sandboxed bash environment?", "Isolates host filesystem and network."),
97
+ ("IAM: Is 'Least Privilege' IAM enforced for all tools?", "Treat tool access like production IAM permissions."),
98
+ ("Confirmation: Are sensitive commands (git/rm) blocked or confirmed?", "Prevents accidental or malicious damage."),
99
+ ("Swiss Cheese: Are multiple layers of guardrails (filters + logic) active?", "Anthropic's 'Swiss Cheese Defense' model.")
100
+ ]
101
+ },
102
+ {
103
+ "category": "📉 Reliability",
104
+ "checks": [
105
+ ("Circuit Breakers: Are rate limits and circuit breakers active?", "Prevents infinite loops and API exhaustion."),
106
+ ("Human-in-the-Loop: Are critical file/env changes manual?", "Ensures safety in autonomous workflows."),
107
+ ("Logging: Is every tool invocation logged with full context?", "Auditability for autonomous agent decisions.")
108
+ ]
109
+ }
110
+ ]
111
+
112
+ MICROSOFT_CHECKLIST = [
113
+ {
114
+ "category": "🏗️ Core Architecture (Microsoft)",
115
+ "checks": [
116
+ ("Framework: Using Unified Microsoft Agent Framework?", "Merges AutoGen orchestration with Semantic Kernel stability."),
117
+ ("Workflows: Are repeatable, graph-based processes defined?", "Semantic Kernel best practice for enterprise reliability."),
118
+ ("Orchestration: Is a centralized orchestrator managing multi-agent handoffs?", "Critical for complex problem solving in AutoGen."),
119
+ ("Maturity: Are features GA or Preview?", "Graduation process ensures production stability.")
120
+ ]
121
+ },
122
+ {
123
+ "category": "🛡️ Security & Governance",
124
+ "checks": [
125
+ ("Guardrails: Are real-time Semantic Guardrails active?", "Monitors prompts/responses for risky behavior."),
126
+ ("Secrets: Is Azure KeyVault used for key management?", "Production-standard for credential security."),
127
+ ("HITL: Are custom 'Guardrails Agents' active?", "AutoGen pattern for enforcing RAI policies."),
128
+ ("Sandbox: Is code execution isolated in Docker?", "Prevents malicious instruction execution.")
129
+ ]
130
+ },
131
+ {
132
+ "category": "📉 Reliability",
133
+ "checks": [
134
+ ("Observability: Is message tracing enabled via Azure AI?", "Critical for debugging multi-agent message flows."),
135
+ ("Testing: Are TypeChat or similar used for output validation?", "Ensures structured reliability in C#/Python.")
136
+ ]
137
+ }
138
+ ]
139
+
140
+ AWS_CHECKLIST = [
141
+ {
142
+ "category": "🏗️ Core Architecture (AWS)",
143
+ "checks": [
144
+ ("Action Groups: Are Bedrock Action Groups used for tools?", "Standardizes tool execution via OpenAPI schemas."),
145
+ ("Grounding: Is Contextual Grounding enabled in Knowledge Bases?", "Mitigates hallucinations by anchoring to facts."),
146
+ ("Isolation: Is model customization running in a VPC?", "Ensures network security for specialized training.")
147
+ ]
148
+ },
149
+ {
150
+ "category": "🛡️ Security & Guardrails",
151
+ "checks": [
152
+ ("Guardrails: Are organization-level Bedrock Guardrails active?", "Enforces consistent RAI policies across apps."),
153
+ ("Information: Are PII Redaction and Denied Topics configured?", "Protects sensitive data and prevents brand risk."),
154
+ ("IAM: Are service roles scoped to least-privilege?", "Ensures agents cannot cross-service impersonate."),
155
+ ("KMS: Is encryption enabled via Customer Managed Keys (CMK)?", "Production requirement for rest and transit.")
156
+ ]
157
+ },
158
+ {
159
+ "category": "📉 Operations",
160
+ "checks": [
161
+ ("Logging: Is Model Invocation Logging enabled?", "Mandatory for audit and compliance (Audit Manager)."),
162
+ ("Tracing: Are Agent Traces used to monitor orchestration?", "Provides visibility into RAG and reasoning logic."),
163
+ ("IaC: Is the agent deployed via CloudFormation or CDK?", "Ensures repeatable and stable deployments.")
164
+ ]
165
+ }
166
+ ]
167
+
168
+ COPILOTKIT_CHECKLIST = [
169
+ {
170
+ "category": "🏗️ Core Architecture (CopilotKit)",
171
+ "checks": [
172
+ ("Platform: Is CopilotKit Cloud or Self-Hosted used?", "Determines control over infrastructure and state."),
173
+ ("State: Is shared state used for UI-Agent sync?", "Ensures the 'Face' remains aligned with the 'Engine'."),
174
+ ("Reconnection: Is reliable thread persistence enabled?", "Critical for long-running user sessions.")
175
+ ]
176
+ },
177
+ {
178
+ "category": "🛡️ Security & Guardrails",
179
+ "checks": [
180
+ ("Moderation: Is 'guardrails_c' configured in the Cloud?", "Uses OpenAI content moderation as a baseline."),
181
+ ("Auth: Is MFA and Conditional Access enforced via Entra/IAM?", "Ensures only trusted users can trigger agent actions."),
182
+ ("Labels: Are Microsoft Purview sensitivity labels applied?", "Controls what documents the Copilot can access."),
183
+ ("HITL: Are 'Human-in-the-Loop' checkpoints defined?", "Empowers users to guide agents at critical junctures.")
184
+ ]
185
+ },
186
+ {
187
+ "category": "📉 Deployment",
188
+ "checks": [
189
+ ("Staging: Is a staged rollout (pilot program) active?", "Best practice for minimizing AI-driven risks."),
190
+ ("Monitoring: Is activity logging integrated with SIEM?", "Provides anomalous activity detection (Sentinel).")
191
+ ]
192
+ }
193
+ ]
194
+
195
+ LANGCHAIN_CHECKLIST = [
196
+ {
197
+ "category": "🏗️ LangChain / LangGraph Architecture",
198
+ "checks": [
199
+ ("State: Is a typed State Schema used for the graph?", "Ensures data integrity across complex agentic nodes."),
200
+ ("Persistence: Is a Checkpointer (Sqlite/Postgres) active?", "Mandatory for long-running agents and cross-session resume."),
201
+ ("Observability: Is LangSmith integrated for trace analysis?", "De-facto standard for debugging cyclic graph execution."),
202
+ ("Tooling: Are custom tools wrapped in @tool decorators?", "Ensures schema extraction and LLM compatibility (OpenAI/Anthropic).")
203
+ ]
204
+ },
205
+ {
206
+ "category": "🛡️ Security & Guardrails",
207
+ "checks": [
208
+ ("Loop: Is a 'Max Iterations' limit set on the Graph?", "Prevents infinite loops and runaway API costs."),
209
+ ("Secrets: Are API keys loaded via ChatOpenAI(api_key=...)?", "Ensures keys are injectable and not hardcoded."),
210
+ ("Moderation: Is a moderation node active in the graph?", "Pattern for real-time safety filtering of agent thoughts.")
211
+ ]
212
+ }
213
+ ]
214
+
215
+ GENERIC_CHECKLIST = [
216
+ {
217
+ "category": "🏗️ General Agent Architecture",
218
+ "checks": [
219
+ ("Tooling: Does the agent use structured tool calling?", "Essential for reliable interactions."),
220
+ ("Orchestration: Is there a clear reason-act loop?", "Ensures agentic behavior."),
221
+ ("Observability: Are traces/logs being captured?", "Critical for debugging production agents.")
222
+ ]
223
+ },
224
+ {
225
+ "category": "🛡️ Security",
226
+ "checks": [
227
+ ("Sandbox: Are tools running in an isolated environment?", "Protects the host system."),
228
+ ("Input Validation: Are tool arguments validated?", "Prevents local execution attacks.")
229
+ ]
230
+ }
231
+ ]
232
+
233
+ FIREBASE_CHECKLIST = [
234
+ {
235
+ "category": "🏗️ Firebase Infrastructure",
236
+ "checks": [
237
+ ("Hosting: Are security headers (HSTS, CSP) configured in firebase.json?", "Prevents cross-site scripting and hijacking."),
238
+ ("Firestore: Are composite indexes used for complex agent queries?", "Ensures high-performance data retrieval for RAG."),
239
+ ("Functions: Is 'Minimum Instances' set for critical agent tools?", "Reduces cold-start latency for backend tool execution."),
240
+ ("Rules: Are security rules locked down to 'request.auth'?", "Prevents unauthorized database access.")
241
+ ]
242
+ }
243
+ ]
244
+
245
+
246
+ # --- MULTI-LANGUAGE / FRONTEND CHECKLISTS ---
247
+
248
+ STREAMLIT_CHECKLIST = [
249
+ {
250
+ "category": "🏗️ Streamlit Architecture",
251
+ "checks": [
252
+ ("State Management: Using st.session_state for agent history?", "Critical for maintaining context in stateful agents."),
253
+ ("Async: Are long-running agent calls wrapped in st.spinner?", "Improves UX by providing immediate feedback."),
254
+ ("Secrets: Using .streamlit/secrets.toml instead of hardcoding?", "Standard for secure key management in Streamlit.")
255
+ ]
256
+ }
257
+ ]
258
+
259
+ LIT_CHECKLIST = [
260
+ {
261
+ "category": "🏗️ Lit Web Components",
262
+ "checks": [
263
+ ("Protocol: Is A2UI BaseElement used for styling isolation?", "Ensures components work across different host apps."),
264
+ ("Reactivity: Are agent updates handled via @property decorator?", "Standard for efficient Lite-element updates."),
265
+ ("Shadow DOM: Are styles encapsulated to avoid platform leaking?", "Critical for distributing agent widgets to 3rd party sites.")
266
+ ]
267
+ }
268
+ ]
269
+
270
+ ANGULAR_CHECKLIST = [
271
+ {
272
+ "category": "🏗️ Angular Enterprise Face",
273
+ "checks": [
274
+ ("Signals: Using Angular Signals for real-time agent updates?", "Modern reactive pattern for low-latency UIs."),
275
+ ("Interceptors: Is there a global error handler for Agent API timeouts?", "Ensures graceful degradation when LLMs are slow."),
276
+ ("DI: Is the Agent Engine abstracted as a Service?", "Promotes testability and clean architecture.")
277
+ ]
278
+ }
279
+ ]
280
+
281
+ NODEJS_CHECKLIST = [
282
+ {
283
+ "category": "🏗️ NodeJS / TypeScript Engine",
284
+ "checks": [
285
+ ("Runtime: Using Bun or Node 20+ for native fetch?", "Optimizes performance for high-frequency API calls."),
286
+ ("Security: Is Helmet middleware active in the Face API?", "Hardens the Express/Hono server against common attacks."),
287
+ ("Types: Are Zod/Pydantic-like schemas used for tool outputs?", "Ensures type-safety across the agent-tool boundary.")
288
+ ]
289
+ }
290
+ ]
291
+
292
+ GO_CHECKLIST = [
293
+ {
294
+ "category": "🏗️ Go High-Perf Engine",
295
+ "checks": [
296
+ ("Concurrency: Using Goroutines for parallel tool execution?", "Leverages Go's performance for multi-agent orchestration."),
297
+ ("Validation: Using struct tags for JSON schema enforcement?", "Standard for ensuring engine-face protocol compatibility."),
298
+ ("Tracing: Using OpenTelemetry for multi-hop agent traces?", "Mandatory for observability in complex Go agents.")
299
+ ]
300
+ }
301
+ ]
302
+
303
+
304
+ FRAMEWORKS = {
305
+ "google": {
306
+
307
+
308
+ "name": "Google Vertex AI / ADK",
309
+ "checklist": GOOGLE_CHECKLIST,
310
+ "indicators": [r"google-cloud-aiplatform", r"vertexai", r"adk", r"Google Cloud"]
311
+ },
312
+ "openai": {
313
+ "name": "OpenAI / Agentkit",
314
+ "checklist": OPENAI_CHECKLIST,
315
+ "indicators": [r"openai", r"gpt-", r"Agentkit", r"Assistant API"]
316
+ },
317
+ "anthropic": {
318
+ "name": "Anthropic Claude / SDK",
319
+ "checklist": ANTHROPIC_CHECKLIST,
320
+ "indicators": [r"anthropic", r"claude", r"sonnet", r"opus", r"haiku"]
321
+ },
322
+ "microsoft": {
323
+ "name": "Microsoft Agent Framework / AutoGen",
324
+ "checklist": MICROSOFT_CHECKLIST,
325
+ "indicators": [r"autogen", r"semantic-kernel", r"microsoft-agent", r"TypeChat"]
326
+ },
327
+ "aws": {
328
+ "name": "AWS Bedrock Agents",
329
+ "checklist": AWS_CHECKLIST,
330
+ "indicators": [r"boto3", r"bedrock", r"aws-sdk", r"ActionGroup"]
331
+ },
332
+ "copilotkit": {
333
+ "name": "CopilotKit.ai",
334
+ "checklist": COPILOTKIT_CHECKLIST,
335
+ "indicators": [r"copilotkit", r"Guardrails_c", r"CopilotSidebar"]
336
+ },
337
+ "langchain": {
338
+ "name": "LangChain / LangGraph",
339
+ "checklist": LANGCHAIN_CHECKLIST,
340
+ "indicators": [r"langchain", r"langgraph", r"stategraph", r"checkpointer"]
341
+ },
342
+
343
+ "streamlit": {
344
+ "name": "Streamlit (Python)",
345
+ "checklist": STREAMLIT_CHECKLIST,
346
+ "indicators": [r"streamlit", r"st\.", r"st_chat_message"]
347
+ },
348
+ "lit": {
349
+ "name": "Lit / Web Components",
350
+ "checklist": LIT_CHECKLIST,
351
+ "indicators": [r"lit-element", r"lit-html", r"@customElement"]
352
+ },
353
+ "angular": {
354
+ "name": "Angular Face",
355
+ "checklist": ANGULAR_CHECKLIST,
356
+ "indicators": [r"@angular/core", r"NgModule", r"RxJS"]
357
+ },
358
+ "nodejs": {
359
+ "name": "NodeJS / TypeScript Engine",
360
+ "checklist": NODEJS_CHECKLIST,
361
+ "indicators": [r"package\.json", r"npm", r"node", r"express", r"hono"]
362
+ },
363
+ "go": {
364
+ "name": "Go High-Perf Engine",
365
+ "checklist": GO_CHECKLIST,
366
+ "indicators": [r"go\.mod", r"goroutine", r"golang"]
367
+ },
368
+ "firebase": {
369
+ "name": "Firebase / Google Cloud Hosting",
370
+ "checklist": FIREBASE_CHECKLIST,
371
+ "indicators": [r"firebase\.json", r"\.firebaserc", r"firestore"]
372
+ },
373
+ "generic": {
374
+
375
+ "name": "Generic Agentic Stack",
376
+ "checklist": GENERIC_CHECKLIST,
377
+ "indicators": []
378
+ }
379
+ }
380
+
381
+
382
+ def detect_framework(path: str = ".") -> str:
383
+ """ Detects the framework based on README or requirements.txt files. """
384
+ content = ""
385
+ # Check README.md
386
+ readme_path = os.path.join(path, "README.md")
387
+ if os.path.exists(readme_path):
388
+ with open(readme_path, "r") as f:
389
+ content += f.read()
390
+
391
+ # Check requirements.txt, pyproject.toml, package.json, go.mod, or firebase.json
392
+ for filename in ["requirements.txt", "pyproject.toml", "package.json", "go.mod", "firebase.json", ".firebaserc"]:
393
+
394
+
395
+ file_path = os.path.join(path, filename)
396
+ if os.path.exists(file_path):
397
+ content += f" {filename} " # Include filename as indicator
398
+ with open(file_path, "r") as f:
399
+ content += f.read()
400
+
401
+ # Match indicators
402
+ for framework, data in FRAMEWORKS.items():
403
+ for indicator in data["indicators"]:
404
+ if re.search(indicator, content, re.IGNORECASE):
405
+ return framework
406
+
407
+ return "generic"
@@ -0,0 +1,35 @@
1
+ from typing import List, Dict, Any
2
+ import asyncio
3
+
4
+ class MCPHub:
5
+ """
6
+ Model Context Protocol (MCP) Hub.
7
+ Optimizes tool discovery, execution, and cost across multiple providers.
8
+ """
9
+
10
+ def __init__(self):
11
+ self.registry = {
12
+ "search": {"type": "mcp", "provider": "google-search", "status": "optimized"},
13
+ "db": {"type": "mcp", "provider": "alloydb-vector", "status": "optimized"},
14
+ "legacy_crm": {"type": "rest_api", "provider": "internal", "status": "deprecated"}
15
+ }
16
+
17
+ async def execute_tool(self, tool_name: str, args: Dict[str, Any]):
18
+ """
19
+ Executes a tool via MCP if available, else falls back to legacy.
20
+ Logs metrics for the Flight Recorder.
21
+ """
22
+ if tool_name not in self.registry:
23
+ raise ValueError(f"Tool {tool_name} not found in MCP Registry.")
24
+
25
+ config = self.registry[tool_name]
26
+
27
+ if config["status"] == "deprecated":
28
+ print(f"⚠️ WARNING: Using legacy Tool API for '{tool_name}'. Migrate to MCP for 30% lower latency.")
29
+
30
+ print(f"🛠️ Executing tool '{tool_name}' via {config['type']} protocol...")
31
+ await asyncio.sleep(0.1) # Simulating execution
32
+
33
+ return {"result": f"Data from {tool_name}", "protocol": config["type"]}
34
+
35
+ global_mcp_hub = MCPHub()
@@ -0,0 +1,44 @@
1
+ import time
2
+ from typing import Dict, Any, List
3
+
4
+ class MemoryOptimizer:
5
+ """
6
+ Optimizes agent memory usage by implementing eviction policies and size limits.
7
+ Helps prevent 'Large System Instruction' bloat over long conversations.
8
+ """
9
+
10
+ def __init__(self, max_items: int = 50, ttl_seconds: int = 3600):
11
+ self.max_items = max_items
12
+ self.ttl_seconds = ttl_seconds
13
+ self.memory: Dict[str, Dict[str, Any]] = {}
14
+
15
+ def add_event(self, event_id: str, data: Any):
16
+ """Adds an event to memory with a timestamp."""
17
+ # Eviction logic: If full, remove oldest
18
+ if len(self.memory) >= self.max_items:
19
+ oldest_key = min(self.memory.keys(), key=lambda k: self.memory[k]['timestamp'])
20
+ del self.memory[oldest_key]
21
+
22
+ self.memory[event_id] = {
23
+ "data": data,
24
+ "timestamp": time.time()
25
+ }
26
+
27
+ def get_optimized_context(self) -> List[Any]:
28
+ """Returns memory filtered by TTL and sorted by recency."""
29
+ current_time = time.time()
30
+ valid_items = [
31
+ item['data'] for item in self.memory.values()
32
+ if (current_time - item['timestamp']) < self.ttl_seconds
33
+ ]
34
+ return valid_items
35
+
36
+ def compress_summaries(self, items: List[str]) -> str:
37
+ """
38
+ Placeholder for LLM-based summarization to compress memory.
39
+ In a real scenario, this would call a Flash model to summarize history.
40
+ """
41
+ return f"Summary of {len(items)} items..."
42
+
43
+ # Global Instance for the Cockpit
44
+ agent_memory_manager = MemoryOptimizer(max_items=20, ttl_seconds=1800)
@@ -0,0 +1,103 @@
1
+ import os
2
+ from datetime import datetime
3
+ from rich.console import Console
4
+ from rich.panel import Panel
5
+ from rich.table import Table
6
+
7
+ # Import from package namespace
8
+ from agent_ops_cockpit.ops import arch_review, reliability, secret_scanner, ui_auditor
9
+ from agent_ops_cockpit.eval import quality_climber, red_team
10
+ from agent_ops_cockpit import optimizer
11
+
12
+ console = Console()
13
+
14
+ class CockpitOrchestrator:
15
+ """
16
+ Main orchestrator for AgentOps audits.
17
+ Runs Arch Review, Quality Baseline, Red Team, and Performance tests.
18
+ """
19
+
20
+ def __init__(self):
21
+ self.timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
22
+ self.report_path = "cockpit_final_report.md"
23
+ self.results = {}
24
+
25
+ def run_step(self, name: str, func, *args, **kwargs):
26
+ console.print(f"\n🚀 [bold]Step: {name}[/bold]")
27
+ try:
28
+ # Capturing output for internal functions is harder without redirecting stdout
29
+ # For now, we just execute them for the effect and note success
30
+ func(*args, **kwargs)
31
+ self.results[name] = {
32
+ "success": True,
33
+ "output": f"Audit {name} executed successfully."
34
+ }
35
+ console.print(f"✅ {name} Completed.")
36
+ except Exception as e:
37
+ self.results[name] = {"success": False, "output": str(e)}
38
+ console.print(f"❌ {name} Failed: {e}")
39
+
40
+ def generate_report(self):
41
+ report = [
42
+ "# 🏁 AgentOps Cockpit: Final Audit Report",
43
+ f"**Timestamp**: {self.timestamp}",
44
+ f"**Status**: {'PASS' if all(r['success'] for r in self.results.values()) else 'FAIL'}",
45
+ "\n---",
46
+ "\n## 📊 Executive Summary"
47
+ ]
48
+
49
+ summary_table = Table(show_header=True, header_style="bold magenta")
50
+ summary_table.add_column("Audit Type")
51
+ summary_table.add_column("Status")
52
+
53
+ for name, data in self.results.items():
54
+ status = "✅ PASS" if data["success"] else "❌ FAIL"
55
+ summary_table.add_row(name, status)
56
+ report.append(f"- **{name}**: {status}")
57
+
58
+ console.print("\n", summary_table)
59
+
60
+ report.append("\n## 🔍 Detailed Findings")
61
+ for name, data in self.results.items():
62
+ report.append(f"\n### {name}")
63
+ report.append(data["output"])
64
+
65
+ report.append("\n---")
66
+ report.append("\n*Generated by the AgentOps Cockpit Orchestrator.*")
67
+
68
+ with open(self.report_path, "w") as f:
69
+ f.write("\n".join(report))
70
+
71
+ console.print(f"\n✨ [bold green]Final Report generated at {self.report_path}[/bold green]")
72
+
73
+ def run_full_audit():
74
+ orchestrator = CockpitOrchestrator()
75
+
76
+ console.print(Panel.fit(
77
+ "🕹️ [bold blue]AGENTOPS COCKPIT: FULL SYSTEM AUDIT[/bold blue]\nLaunching all governance and optimization modules...",
78
+ border_style="blue"
79
+ ))
80
+
81
+ # 1. Architecture Review
82
+ orchestrator.run_step("Architecture Review", arch_review.audit, path=".")
83
+
84
+ # 2. Quality Baseline
85
+ orchestrator.run_step("Quality Baseline", quality_climber.audit, path=".")
86
+
87
+ # 3. Security & Secrets
88
+ orchestrator.run_step("Secret Scanner (Leak Detection)", secret_scanner.scan, path=".")
89
+ orchestrator.run_step("Adversarial Security (Red Team)", red_team.audit, agent_path="src/backend/agent.py")
90
+
91
+ # 4. Face (UI/UX) Audit
92
+ orchestrator.run_step("UI/UX Quality (Face Auditor)", ui_auditor.audit, path="src")
93
+
94
+ # 5. Token Optimization Audit
95
+ orchestrator.run_step("Token Optimization Audit", optimizer.audit, file_path="src/backend/agent.py", interactive=False)
96
+
97
+ # 6. Reliability Audit (Unit + Regression)
98
+ orchestrator.run_step("Reliability (Unit + Regression)", reliability.run_tests)
99
+
100
+ orchestrator.generate_report()
101
+
102
+ if __name__ == "__main__":
103
+ run_full_audit()
@@ -0,0 +1,47 @@
1
+ import re
2
+ from typing import Dict, Any, List
3
+ from rich.console import Console
4
+
5
+ console = Console()
6
+
7
+ class PIIScrubber:
8
+ """
9
+ Standard AgentOps PII Scrubber.
10
+ Detects and masks sensitive information before it reaches the LLM.
11
+ """
12
+
13
+ PATTERNS = {
14
+ "EMAIL": r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}",
15
+ "PHONE": r"\b(?:\+?1[-. ]?)?\(?([2-9][0-8][0-9])\)?[-. ]?([2-9][0-9]{2})[-. ]?([0-9]{4})\b",
16
+ "CREDIT_CARD": r"\b(?:\d{4}[ -]?){3}\d{4}\b",
17
+ "SSN": r"\b\d{3}-\d{2}-\d{4}\b",
18
+ "IPV4": r"\b(?:\d{1,3}\.){3}\d{1,3}\b"
19
+ }
20
+
21
+ def __init__(self, enabled: bool = True):
22
+ self.enabled = enabled
23
+
24
+ def scrub(self, text: str) -> str:
25
+ """Scan and mask patterns in the text."""
26
+ if not self.enabled:
27
+ return text
28
+
29
+ scrubbed_text = text
30
+ for label, pattern in self.PATTERNS.items():
31
+ scrubbed_text = re.sub(pattern, f"[[MASKED_{label}]]", scrubbed_text)
32
+
33
+ return scrubbed_text
34
+
35
+ def audit_report(self, text: str) -> Dict[str, Any]:
36
+ """Detect findings without masking for auditing purposes."""
37
+ findings = {}
38
+ for label, pattern in self.PATTERNS.items():
39
+ matches = re.findall(pattern, text)
40
+ if matches:
41
+ findings[label] = len(matches)
42
+ return findings
43
+
44
+ def agent_pii_middleware(prompt: str) -> str:
45
+ """Drop-in middleware for agent prompts."""
46
+ scrubber = PIIScrubber()
47
+ return scrubber.scrub(prompt)