agentops-cockpit 0.2.2__tar.gz → 0.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. agentops_cockpit-0.4.0/A2A_GUIDE.md +58 -0
  2. {agentops_cockpit-0.2.2 → agentops_cockpit-0.4.0}/PKG-INFO +37 -10
  3. {agentops_cockpit-0.2.2 → agentops_cockpit-0.4.0}/README.md +35 -9
  4. agentops_cockpit-0.4.0/public/og-image.png +0 -0
  5. {agentops_cockpit-0.2.2 → agentops_cockpit-0.4.0}/pyproject.toml +2 -1
  6. {agentops_cockpit-0.2.2 → agentops_cockpit-0.4.0}/src/agent_ops_cockpit/cli/main.py +22 -18
  7. agentops_cockpit-0.4.0/src/agent_ops_cockpit/ops/arch_review.py +100 -0
  8. agentops_cockpit-0.4.0/src/agent_ops_cockpit/ops/mcp_hub.py +80 -0
  9. agentops_cockpit-0.4.0/src/agent_ops_cockpit/ops/orchestrator.py +103 -0
  10. agentops_cockpit-0.4.0/src/agent_ops_cockpit/ops/reliability.py +50 -0
  11. agentops_cockpit-0.4.0/src/agent_ops_cockpit/optimizer.py +263 -0
  12. agentops_cockpit-0.4.0/src/agent_ops_cockpit/shadow/__init__.py +0 -0
  13. agentops_cockpit-0.4.0/src/backend/__init__.py +0 -0
  14. agentops_cockpit-0.4.0/src/backend/cache/semantic_cache.py +59 -0
  15. agentops_cockpit-0.4.0/src/backend/cost_control.py +53 -0
  16. agentops_cockpit-0.4.0/src/backend/eval/__init__.py +1 -0
  17. agentops_cockpit-0.4.0/src/backend/eval/load_test.py +91 -0
  18. agentops_cockpit-0.4.0/src/backend/eval/quality_climber.py +129 -0
  19. agentops_cockpit-0.4.0/src/backend/eval/red_team.py +72 -0
  20. agentops_cockpit-0.4.0/src/backend/ops/__init__.py +1 -0
  21. agentops_cockpit-0.4.0/src/backend/ops/cost_optimizer.py +40 -0
  22. agentops_cockpit-0.4.0/src/backend/ops/evidence.py +25 -0
  23. agentops_cockpit-0.4.0/src/backend/ops/frameworks.py +407 -0
  24. agentops_cockpit-0.4.0/src/backend/ops/mcp_hub.py +80 -0
  25. agentops_cockpit-0.4.0/src/backend/ops/memory_optimizer.py +44 -0
  26. agentops_cockpit-0.4.0/src/backend/ops/pii_scrubber.py +47 -0
  27. agentops_cockpit-0.4.0/src/backend/ops/secret_scanner.py +75 -0
  28. agentops_cockpit-0.4.0/src/backend/ops/ui_auditor.py +120 -0
  29. agentops_cockpit-0.4.0/src/backend/shadow/router.py +75 -0
  30. {agentops_cockpit-0.2.2 → agentops_cockpit-0.4.0}/src/components/Home.tsx +3 -2
  31. agentops_cockpit-0.2.2/A2A_GUIDE.md +0 -39
  32. agentops_cockpit-0.2.2/src/backend/ops/mcp_hub.py +0 -35
  33. {agentops_cockpit-0.2.2 → agentops_cockpit-0.4.0}/.gitignore +0 -0
  34. {agentops_cockpit-0.2.2 → agentops_cockpit-0.4.0}/BE_INTEGRATION_GUIDE.md +0 -0
  35. {agentops_cockpit-0.2.2 → agentops_cockpit-0.4.0}/DEPLOYMENT.md +0 -0
  36. {agentops_cockpit-0.2.2 → agentops_cockpit-0.4.0}/Dockerfile +0 -0
  37. {agentops_cockpit-0.2.2 → agentops_cockpit-0.4.0}/LICENSE +0 -0
  38. {agentops_cockpit-0.2.2 → agentops_cockpit-0.4.0}/Makefile +0 -0
  39. {agentops_cockpit-0.2.2 → agentops_cockpit-0.4.0}/firebase.json +0 -0
  40. {agentops_cockpit-0.2.2 → agentops_cockpit-0.4.0}/index.html +0 -0
  41. {agentops_cockpit-0.2.2 → agentops_cockpit-0.4.0}/package.json +0 -0
  42. {agentops_cockpit-0.2.2 → agentops_cockpit-0.4.0}/public/A2A_GUIDE.md +0 -0
  43. {agentops_cockpit-0.2.2 → agentops_cockpit-0.4.0}/public/AGENT_OPS_STORY.md +0 -0
  44. {agentops_cockpit-0.2.2 → agentops_cockpit-0.4.0}/public/BE_INTEGRATION_GUIDE.md +0 -0
  45. {agentops_cockpit-0.2.2 → agentops_cockpit-0.4.0}/public/CLI_COMMANDS.md +0 -0
  46. {agentops_cockpit-0.2.2 → agentops_cockpit-0.4.0}/public/DEPLOYMENT.md +0 -0
  47. {agentops_cockpit-0.2.2 → agentops_cockpit-0.4.0}/public/DEVELOPMENT.md +0 -0
  48. {agentops_cockpit-0.2.2 → agentops_cockpit-0.4.0}/public/GEMINI.md +0 -0
  49. {agentops_cockpit-0.2.2 → agentops_cockpit-0.4.0}/public/GETTING_STARTED.md +0 -0
  50. {agentops_cockpit-0.2.2 → agentops_cockpit-0.4.0}/public/README.md +0 -0
  51. {agentops_cockpit-0.2.2 → agentops_cockpit-0.4.0}/public/cicd-workflow.png +0 -0
  52. {agentops_cockpit-0.2.2 → agentops_cockpit-0.4.0}/public/diagrams/agentic-stack.png +0 -0
  53. {agentops_cockpit-0.2.2 → agentops_cockpit-0.4.0}/public/diagrams/value-proposition.png +0 -0
  54. {agentops_cockpit-0.2.2 → agentops_cockpit-0.4.0}/public/hero.png +0 -0
  55. {agentops_cockpit-0.2.2 → agentops_cockpit-0.4.0}/src/App.tsx +0 -0
  56. {agentops_cockpit-0.2.2 → agentops_cockpit-0.4.0}/src/a2ui/A2UIRenderer.tsx +0 -0
  57. {agentops_cockpit-0.2.2 → agentops_cockpit-0.4.0}/src/a2ui/components/index.tsx +0 -0
  58. {agentops_cockpit-0.2.2 → agentops_cockpit-0.4.0}/src/a2ui/components/lit-component-example.ts +0 -0
  59. {agentops_cockpit-0.2.2 → agentops_cockpit-0.4.0}/src/a2ui/types.ts +0 -0
  60. {agentops_cockpit-0.2.2 → agentops_cockpit-0.4.0}/src/agent_ops_cockpit/__init__.py +0 -0
  61. {agentops_cockpit-0.2.2/src/agent_ops_cockpit/cli → agentops_cockpit-0.4.0/src/agent_ops_cockpit/cache}/__init__.py +0 -0
  62. {agentops_cockpit-0.2.2/src/backend → agentops_cockpit-0.4.0/src/agent_ops_cockpit}/cache/semantic_cache.py +0 -0
  63. {agentops_cockpit-0.2.2/src/backend → agentops_cockpit-0.4.0/src/agent_ops_cockpit/cli}/__init__.py +0 -0
  64. {agentops_cockpit-0.2.2/src/backend → agentops_cockpit-0.4.0/src/agent_ops_cockpit}/cost_control.py +0 -0
  65. {agentops_cockpit-0.2.2/src/backend → agentops_cockpit-0.4.0/src/agent_ops_cockpit}/eval/__init__.py +0 -0
  66. {agentops_cockpit-0.2.2/src/backend → agentops_cockpit-0.4.0/src/agent_ops_cockpit}/eval/load_test.py +0 -0
  67. {agentops_cockpit-0.2.2/src/backend → agentops_cockpit-0.4.0/src/agent_ops_cockpit}/eval/quality_climber.py +0 -0
  68. {agentops_cockpit-0.2.2/src/backend → agentops_cockpit-0.4.0/src/agent_ops_cockpit}/eval/red_team.py +0 -0
  69. {agentops_cockpit-0.2.2/src/backend → agentops_cockpit-0.4.0/src/agent_ops_cockpit}/ops/__init__.py +0 -0
  70. {agentops_cockpit-0.2.2/src/backend → agentops_cockpit-0.4.0/src/agent_ops_cockpit}/ops/cost_optimizer.py +0 -0
  71. {agentops_cockpit-0.2.2/src/backend → agentops_cockpit-0.4.0/src/agent_ops_cockpit}/ops/evidence.py +0 -0
  72. {agentops_cockpit-0.2.2/src/backend → agentops_cockpit-0.4.0/src/agent_ops_cockpit}/ops/frameworks.py +0 -0
  73. {agentops_cockpit-0.2.2/src/backend → agentops_cockpit-0.4.0/src/agent_ops_cockpit}/ops/memory_optimizer.py +0 -0
  74. {agentops_cockpit-0.2.2/src/backend → agentops_cockpit-0.4.0/src/agent_ops_cockpit}/ops/pii_scrubber.py +0 -0
  75. {agentops_cockpit-0.2.2/src/backend → agentops_cockpit-0.4.0/src/agent_ops_cockpit}/ops/secret_scanner.py +0 -0
  76. {agentops_cockpit-0.2.2/src/backend → agentops_cockpit-0.4.0/src/agent_ops_cockpit}/ops/ui_auditor.py +0 -0
  77. {agentops_cockpit-0.2.2/src/backend → agentops_cockpit-0.4.0/src/agent_ops_cockpit}/shadow/router.py +0 -0
  78. {agentops_cockpit-0.2.2 → agentops_cockpit-0.4.0}/src/backend/agent.py +0 -0
  79. {agentops_cockpit-0.2.2 → agentops_cockpit-0.4.0}/src/backend/ops/arch_review.py +0 -0
  80. {agentops_cockpit-0.2.2 → agentops_cockpit-0.4.0}/src/backend/ops/orchestrator.py +0 -0
  81. {agentops_cockpit-0.2.2 → agentops_cockpit-0.4.0}/src/backend/ops/reliability.py +0 -0
  82. {agentops_cockpit-0.2.2 → agentops_cockpit-0.4.0}/src/backend/optimizer.py +0 -0
  83. {agentops_cockpit-0.2.2 → agentops_cockpit-0.4.0}/src/backend/tests/test_agent.py +0 -0
  84. {agentops_cockpit-0.2.2 → agentops_cockpit-0.4.0}/src/backend/tests/test_arch_review.py +0 -0
  85. {agentops_cockpit-0.2.2 → agentops_cockpit-0.4.0}/src/backend/tests/test_frameworks.py +0 -0
  86. {agentops_cockpit-0.2.2 → agentops_cockpit-0.4.0}/src/backend/tests/test_optimizer.py +0 -0
  87. {agentops_cockpit-0.2.2 → agentops_cockpit-0.4.0}/src/backend/tests/test_quality_climber.py +0 -0
  88. {agentops_cockpit-0.2.2 → agentops_cockpit-0.4.0}/src/backend/tests/test_red_team.py +0 -0
  89. {agentops_cockpit-0.2.2 → agentops_cockpit-0.4.0}/src/backend/tests/test_secret_scanner.py +0 -0
  90. {agentops_cockpit-0.2.2 → agentops_cockpit-0.4.0}/src/components/FlightRecorder.tsx +0 -0
  91. {agentops_cockpit-0.2.2 → agentops_cockpit-0.4.0}/src/components/OpsDashboard.tsx +0 -0
  92. {agentops_cockpit-0.2.2 → agentops_cockpit-0.4.0}/src/components/Playground.tsx +0 -0
  93. {agentops_cockpit-0.2.2 → agentops_cockpit-0.4.0}/src/components/ThemeToggle.tsx +0 -0
  94. {agentops_cockpit-0.2.2 → agentops_cockpit-0.4.0}/src/docs/DocLayout.tsx +0 -0
  95. {agentops_cockpit-0.2.2 → agentops_cockpit-0.4.0}/src/docs/DocPage.tsx +0 -0
  96. {agentops_cockpit-0.2.2 → agentops_cockpit-0.4.0}/src/index.css +0 -0
  97. {agentops_cockpit-0.2.2 → agentops_cockpit-0.4.0}/src/knowledge/example_policy.txt +0 -0
  98. {agentops_cockpit-0.2.2 → agentops_cockpit-0.4.0}/src/main.tsx +0 -0
  99. {agentops_cockpit-0.2.2 → agentops_cockpit-0.4.0}/tsconfig.json +0 -0
  100. {agentops_cockpit-0.2.2 → agentops_cockpit-0.4.0}/vite.config.ts +0 -0
@@ -0,0 +1,58 @@
1
+ # 📡 Agent-to-Agent (A2A) Transmission Standard
2
+
3
+ Building a single agent is easy. Building a **Swarm** of agents that communicate securely and efficiently is the next frontier of AgentOps. The Cockpit implements the **A2A Transmission Standard** to ensure that your "Agent Trinity" remains Well-Architected.
4
+
5
+ ## 🏛️ The A2A Protocol Stack
6
+
7
+ | Layer | Responsibility | Protocol / Spec |
8
+ | :--- | :--- | :--- |
9
+ | **Surface** | Human-Agent Interaction | [A2UI Spec](/docs/a2ui) |
10
+ | **Memory** | Cross-Agent Knowledge | [Vector Workspace (Hive Mind)](/src/backend/cache) |
11
+ | **Logic** | Tool & Reasoning Handshake | [A2P Handshake](#a2p-handshake) |
12
+ | **Security** | Identity & Permissions | [GCP Workload Identity](https://cloud.google.com/kubernetes-engine/docs/how-to/workload-identity) |
13
+
14
+ ---
15
+
16
+ ## 🤝 The A2P Handshake (Agent-to-Proxy)
17
+
18
+ When one agent calls another tool, it shouldn't just send raw text. It must send a **Reasoning Evidence Packet**.
19
+
20
+ ### ❌ The "Old" Way (Brittle)
21
+ ```json
22
+ {
23
+ "query": "What is the budget?",
24
+ "output": "The budget is $500k."
25
+ }
26
+ ```
27
+
28
+ ### ✅ The "Cockpit" Way (Well-Architected)
29
+ ```json
30
+ {
31
+ "trace_id": "tr-9942-x",
32
+ "reasoning_path": ["Fetch Schema", "Query BigQuery", "Apply PIIScrubber"],
33
+ "evidence": [
34
+ { "source": "bq://finance.budget_2026", "assurance_score": 0.98 }
35
+ ],
36
+ "content": {
37
+ "text": "The approved budget is $500k.",
38
+ "a2ui_surface": "DynamicBudgetChart"
39
+ }
40
+ }
41
+ ```
42
+
43
+ ## 🛡️ Governance-as-Code for Swarms
44
+
45
+ On the Cockpit, every A2A transmission is automatically:
46
+ 1. **Scrubbed**: PII is removed before leaving the Engine's VPC.
47
+ 2. **Cached**: Similar cross-agent queries hit the **Hive Mind** instead of expensive LLM reasoning.
48
+ 3. **Audited**: The `arch-review` tool verifies that your multi-agent graph doesn't have "Shadow Loops" (recursive infinite spend).
49
+
50
+ ---
51
+
52
+ ## ⚡ Get Started with A2A
53
+ Use the Cockpit CLI to verify your multi-agent communication:
54
+ ```bash
55
+ agent-ops audit --mode swarm --file multi_agent_entry.py
56
+ ```
57
+
58
+ *This standard is being proposed to the Google Well-Architected Framework for AI Agents committee.*
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: agentops-cockpit
3
- Version: 0.2.2
3
+ Version: 0.4.0
4
4
  Summary: Production-grade Agent Operations (AgentOps) Platform
5
5
  Project-URL: Homepage, https://github.com/enriquekalven/agent-ops-cockpit
6
6
  Project-URL: Bug Tracker, https://github.com/enriquekalven/agent-ops-cockpit/issues
@@ -11,6 +11,7 @@ Classifier: Operating System :: OS Independent
11
11
  Classifier: Programming Language :: Python :: 3
12
12
  Requires-Python: >=3.10
13
13
  Requires-Dist: gitpython>=3.1.0
14
+ Requires-Dist: mcp>=0.1.0
14
15
  Requires-Dist: rich>=13.0.0
15
16
  Requires-Dist: typer>=0.9.0
16
17
  Description-Content-Type: text/markdown
@@ -18,10 +19,20 @@ Description-Content-Type: text/markdown
18
19
  # 🕹️ AgentOps Cockpit
19
20
 
20
21
  <div align="center">
22
+ <img src="https://raw.githubusercontent.com/enriquekalven/agent-cockpit/main/public/og-image.png" alt="AgentOps Cockpit Social Preview" width="100%" />
23
+ </div>
24
+
25
+ <div align="center">
26
+ <br />
27
+ <a href="https://deploy.cloud.google.com?repo=https://github.com/enriquekalven/agent-cockpit">
28
+ <img src="https://deploy.cloud.google.com/button.svg" alt="Deploy to Google Cloud" />
29
+ </a>
30
+ <br />
31
+ <br />
21
32
  <img src="https://img.shields.io/github/stars/enriquekalven/agent-cockpit?style=for-the-badge&color=ffd700" alt="GitHub Stars" />
22
33
  <img src="https://img.shields.io/github/license/enriquekalven/agent-cockpit?style=for-the-badge&color=007bff" alt="License" />
23
34
  <img src="https://img.shields.io/badge/Google-Well--Architected-4285F4?style=for-the-badge&logo=google-cloud" alt="Google Well-Architected" />
24
- <img src="https://img.shields.io/badge/Status-Day%202%20Operations-10b981?style=for-the-badge" alt="Status" />
35
+ <img src="https://img.shields.io/badge/A2A_Standard-Enabled-10b981?style=for-the-badge" alt="A2A Standard" />
25
36
  </div>
26
37
 
27
38
  <br />
@@ -34,7 +45,12 @@ Description-Content-Type: text/markdown
34
45
  ---
35
46
 
36
47
  ## 📽️ The Mission
37
- Most AI agent templates stop at a single Python file and an API key. **The AgentOps Cockpit** is for developers moving into production. While optimized for **ADK**, it provides framework-agnostic governance, safety, and cost guardrails for the entire agentic ecosystem—from CrewAI to LangGraph. Based on the **[Google Well-Architected Framework for Agents](/docs/google-architecture)**.
48
+ Most AI agent templates stop at a single Python file and an API key. **The AgentOps Cockpit** is for developers moving into production. It provides framework-agnostic governance, safety, and cost guardrails for the entire agentic ecosystem.
49
+
50
+ ### Key Pillars:
51
+ - **Governance-as-Code**: Audit your agent against [Google Well-Architected](/docs/google-architecture) best practices.
52
+ - **Agentic Trinity**: Dedicated layers for the Engine (Logic), Face (UX), and Cockpit (Ops).
53
+ - **A2A Connectivity**: Implements the [Agent-to-Agent Transmission Standard](/A2A_GUIDE.md) for secure swarm orchestration.
38
54
 
39
55
  ---
40
56
 
@@ -86,6 +102,9 @@ Don't wait for your users to find prompt injections. Use the built-in Adversaria
86
102
  ### 🏛️ Arch Review & Framework Detection
87
103
  Every agent in the cockpit is graded against a framework-aware checklist. The Cockpit intelligently detects your stack—**Google ADK**, **OpenAI Agentkit**, **Anthropic Claude**, **Microsoft AutoGen/Semantic Kernel**, **AWS Bedrock Agents**, or **CopilotKit**—and runs a tailored audit against corresponding production standards. Use `make arch-review` to verify your **Governance-as-Code**.
88
104
 
105
+ ### 🕹️ MCP Connectivity Hub (Model Context Protocol)
106
+ Stop building one-off tool integrations. The Cockpit provides a unified hub for **MCP Servers**. Connect to Google Search, Slack, or your internal databases via the standardized Model Context Protocol for secure, audited tool execution.
107
+
89
108
  ### 🧗 Quality Hill Climbing (ADK Evaluation)
90
109
  Following **Google ADK Evaluation** best practices, the Cockpit provides an iterative optimization loop. `make quality-baseline` runs your agent against a "Golden Dataset" using **LLM-as-a-Judge** scoring (Response Match & Tool Trajectory), climbing the quality curve until production-grade fidelity is reached.
91
110
 
@@ -93,17 +112,25 @@ Following **Google ADK Evaluation** best practices, the Cockpit provides an iter
93
112
 
94
113
  ## ⌨️ Quick Start
95
114
 
96
- You don't even need to clone the repo to start auditing.
115
+ The Cockpit is available as a first-class CLI on PyPI.
97
116
 
98
117
  ```bash
99
- # 1. Audit your existing agent design
100
- uvx agent-ops-cockpit arch-review
118
+ # 1. Install the Cockpit globally
119
+ pip install agentops-cockpit
101
120
 
102
- # 2. Stress test your endpoint
103
- uvx agent-ops-cockpit load-test --requests 100 --concurrency 10
121
+ # 2. Audit your existing agent design
122
+ agent-ops arch-review
104
123
 
105
- # 3. Scaffold a new Well-Architected app
106
- uvx agent-ops-cockpit create my-agent --ui a2ui
124
+ # 3. Stress test your endpoint
125
+ agent-ops load-test --requests 100 --concurrency 10
126
+
127
+ # 4. Scaffold a new Well-Architected app
128
+ agent-ops create my-agent --ui a2ui
129
+ ```
130
+
131
+ You can also use `uvx` for one-off commands without installation:
132
+ ```bash
133
+ uvx agentops-cockpit arch-review
107
134
  ```
108
135
 
109
136
  ---
@@ -1,10 +1,20 @@
1
1
  # 🕹️ AgentOps Cockpit
2
2
 
3
3
  <div align="center">
4
+ <img src="https://raw.githubusercontent.com/enriquekalven/agent-cockpit/main/public/og-image.png" alt="AgentOps Cockpit Social Preview" width="100%" />
5
+ </div>
6
+
7
+ <div align="center">
8
+ <br />
9
+ <a href="https://deploy.cloud.google.com?repo=https://github.com/enriquekalven/agent-cockpit">
10
+ <img src="https://deploy.cloud.google.com/button.svg" alt="Deploy to Google Cloud" />
11
+ </a>
12
+ <br />
13
+ <br />
4
14
  <img src="https://img.shields.io/github/stars/enriquekalven/agent-cockpit?style=for-the-badge&color=ffd700" alt="GitHub Stars" />
5
15
  <img src="https://img.shields.io/github/license/enriquekalven/agent-cockpit?style=for-the-badge&color=007bff" alt="License" />
6
16
  <img src="https://img.shields.io/badge/Google-Well--Architected-4285F4?style=for-the-badge&logo=google-cloud" alt="Google Well-Architected" />
7
- <img src="https://img.shields.io/badge/Status-Day%202%20Operations-10b981?style=for-the-badge" alt="Status" />
17
+ <img src="https://img.shields.io/badge/A2A_Standard-Enabled-10b981?style=for-the-badge" alt="A2A Standard" />
8
18
  </div>
9
19
 
10
20
  <br />
@@ -17,7 +27,12 @@
17
27
  ---
18
28
 
19
29
  ## 📽️ The Mission
20
- Most AI agent templates stop at a single Python file and an API key. **The AgentOps Cockpit** is for developers moving into production. While optimized for **ADK**, it provides framework-agnostic governance, safety, and cost guardrails for the entire agentic ecosystem—from CrewAI to LangGraph. Based on the **[Google Well-Architected Framework for Agents](/docs/google-architecture)**.
30
+ Most AI agent templates stop at a single Python file and an API key. **The AgentOps Cockpit** is for developers moving into production. It provides framework-agnostic governance, safety, and cost guardrails for the entire agentic ecosystem.
31
+
32
+ ### Key Pillars:
33
+ - **Governance-as-Code**: Audit your agent against [Google Well-Architected](/docs/google-architecture) best practices.
34
+ - **Agentic Trinity**: Dedicated layers for the Engine (Logic), Face (UX), and Cockpit (Ops).
35
+ - **A2A Connectivity**: Implements the [Agent-to-Agent Transmission Standard](/A2A_GUIDE.md) for secure swarm orchestration.
21
36
 
22
37
  ---
23
38
 
@@ -69,6 +84,9 @@ Don't wait for your users to find prompt injections. Use the built-in Adversaria
69
84
  ### 🏛️ Arch Review & Framework Detection
70
85
  Every agent in the cockpit is graded against a framework-aware checklist. The Cockpit intelligently detects your stack—**Google ADK**, **OpenAI Agentkit**, **Anthropic Claude**, **Microsoft AutoGen/Semantic Kernel**, **AWS Bedrock Agents**, or **CopilotKit**—and runs a tailored audit against corresponding production standards. Use `make arch-review` to verify your **Governance-as-Code**.
71
86
 
87
+ ### 🕹️ MCP Connectivity Hub (Model Context Protocol)
88
+ Stop building one-off tool integrations. The Cockpit provides a unified hub for **MCP Servers**. Connect to Google Search, Slack, or your internal databases via the standardized Model Context Protocol for secure, audited tool execution.
89
+
72
90
  ### 🧗 Quality Hill Climbing (ADK Evaluation)
73
91
  Following **Google ADK Evaluation** best practices, the Cockpit provides an iterative optimization loop. `make quality-baseline` runs your agent against a "Golden Dataset" using **LLM-as-a-Judge** scoring (Response Match & Tool Trajectory), climbing the quality curve until production-grade fidelity is reached.
74
92
 
@@ -76,17 +94,25 @@ Following **Google ADK Evaluation** best practices, the Cockpit provides an iter
76
94
 
77
95
  ## ⌨️ Quick Start
78
96
 
79
- You don't even need to clone the repo to start auditing.
97
+ The Cockpit is available as a first-class CLI on PyPI.
80
98
 
81
99
  ```bash
82
- # 1. Audit your existing agent design
83
- uvx agent-ops-cockpit arch-review
100
+ # 1. Install the Cockpit globally
101
+ pip install agentops-cockpit
84
102
 
85
- # 2. Stress test your endpoint
86
- uvx agent-ops-cockpit load-test --requests 100 --concurrency 10
103
+ # 2. Audit your existing agent design
104
+ agent-ops arch-review
87
105
 
88
- # 3. Scaffold a new Well-Architected app
89
- uvx agent-ops-cockpit create my-agent --ui a2ui
106
+ # 3. Stress test your endpoint
107
+ agent-ops load-test --requests 100 --concurrency 10
108
+
109
+ # 4. Scaffold a new Well-Architected app
110
+ agent-ops create my-agent --ui a2ui
111
+ ```
112
+
113
+ You can also use `uvx` for one-off commands without installation:
114
+ ```bash
115
+ uvx agentops-cockpit arch-review
90
116
  ```
91
117
 
92
118
  ---
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "agentops-cockpit"
7
- version = "0.2.2"
7
+ version = "0.4.0"
8
8
  description = "Production-grade Agent Operations (AgentOps) Platform"
9
9
  readme = "README.md"
10
10
  authors = [
@@ -20,6 +20,7 @@ dependencies = [
20
20
  "typer>=0.9.0",
21
21
  "rich>=13.0.0",
22
22
  "GitPython>=3.1.0",
23
+ "mcp>=0.1.0",
23
24
  ]
24
25
 
25
26
  [project.urls]
@@ -6,6 +6,15 @@ from rich.console import Console
6
6
  from rich.panel import Panel
7
7
  import typer
8
8
 
9
+ # Deep imports for portable CLI execution
10
+ from agent_ops_cockpit.ops import arch_review as arch_mod
11
+ from agent_ops_cockpit.ops import orchestrator as orch_mod
12
+ from agent_ops_cockpit.ops import reliability as rel_mod
13
+ from agent_ops_cockpit.eval import quality_climber as quality_mod
14
+ from agent_ops_cockpit.eval import red_team as red_mod
15
+ from agent_ops_cockpit.eval import load_test as load_mod
16
+ from agent_ops_cockpit import optimizer as opt_mod
17
+
9
18
  app = typer.Typer(help="AgentOps Cockpit: The AI Agent Operations Platform", no_args_is_help=True)
10
19
  console = Console()
11
20
 
@@ -14,7 +23,7 @@ REPO_URL = "https://github.com/enriquekalven/agent-ui-starter-pack"
14
23
  @app.command()
15
24
  def version():
16
25
  """Show the version of the Optimized Agent Stack CLI."""
17
- console.print("[bold cyan]agent-ops CLI v0.1.0[/bold cyan]")
26
+ console.print("[bold cyan]agent-ops CLI v0.2.2[/bold cyan]")
18
27
 
19
28
  @app.command()
20
29
  def reliability():
@@ -22,7 +31,7 @@ def reliability():
22
31
  Run reliability audit (Unit Tests + Regression Suite coverage).
23
32
  """
24
33
  console.print("🛡️ [bold green]Launching Reliability Audit...[/bold green]")
25
- subprocess.run([sys.executable, "-m", "backend.ops.reliability", "audit"], env={**os.environ, "PYTHONPATH": "src"})
34
+ rel_mod.run_tests()
26
35
 
27
36
  @app.command()
28
37
  def report():
@@ -30,34 +39,34 @@ def report():
30
39
  Launch full AgentOps audit (Arch, Quality, Security, Cost) and generate a final report.
31
40
  """
32
41
  console.print("🕹️ [bold blue]Launching Full System Audit...[/bold blue]")
33
- subprocess.run([sys.executable, "-m", "backend.ops.orchestrator"], env={**os.environ, "PYTHONPATH": "src"})
42
+ orch_mod.run_full_audit()
34
43
 
35
44
  @app.command()
36
- def quality_baseline():
45
+ def quality_baseline(path: str = "."):
37
46
  """
38
47
  Run iterative 'Hill Climbing' quality audit against a golden dataset.
39
48
  """
40
49
  console.print("🧗 [bold cyan]Launching Quality Hill Climber...[/bold cyan]")
41
- subprocess.run([sys.executable, "-m", "backend.eval.quality_climber", "audit"], env={**os.environ, "PYTHONPATH": "src"})
50
+ quality_mod.audit(path)
42
51
 
43
52
  @app.command()
44
- def arch_review():
53
+ def arch_review(path: str = "."):
45
54
  """
46
55
  Audit agent design against Google Well-Architected Framework.
47
56
  """
48
57
  console.print("🏛️ [bold blue]Launching Architecture Design Review...[/bold blue]")
49
- subprocess.run([sys.executable, "-m", "backend.ops.arch_review", "audit"], env={**os.environ, "PYTHONPATH": "src"})
58
+ arch_mod.audit(path)
50
59
 
51
60
  @app.command()
52
61
  def audit(
53
62
  file_path: str = typer.Argument("src/backend/agent.py", help="Path to the agent code to audit"),
63
+ interactive: bool = typer.Option(True, "--interactive/--no-interactive", "-i", help="Run in interactive mode")
54
64
  ):
55
65
  """
56
66
  Run the Interactive Agent Optimizer audit.
57
67
  """
58
68
  console.print("🔍 [bold blue]Running Agent Operations Audit...[/bold blue]")
59
- # Run the optimizer module
60
- subprocess.run([sys.executable, "-m", "backend.optimizer", "audit", file_path], env={**os.environ, "PYTHONPATH": "src"})
69
+ opt_mod.audit(file_path, interactive)
61
70
 
62
71
  @app.command()
63
72
  def red_team(
@@ -67,7 +76,7 @@ def red_team(
67
76
  Run the Red Team adversarial security evaluation.
68
77
  """
69
78
  console.print("🚩 [bold red]Launching Red Team Evaluation...[/bold red]")
70
- subprocess.run([sys.executable, "-m", "backend.eval.red_team", "audit", agent_path], env={**os.environ, "PYTHONPATH": "src"})
79
+ red_mod.audit(agent_path)
71
80
 
72
81
  @app.command()
73
82
  def load_test(
@@ -79,12 +88,7 @@ def load_test(
79
88
  Stress test agent endpoints for performance and reliability.
80
89
  """
81
90
  console.print("⚡ [bold yellow]Launching Base Load Test...[/bold yellow]")
82
- subprocess.run([
83
- sys.executable, "-m", "backend.eval.load_test", "run",
84
- "--url", url,
85
- "--requests", str(requests),
86
- "--concurrency", str(concurrency)
87
- ], env={**os.environ, "PYTHONPATH": "src"})
91
+ load_mod.run(url, requests, concurrency)
88
92
 
89
93
  @app.command()
90
94
  def deploy(
@@ -98,7 +102,7 @@ def deploy(
98
102
 
99
103
  # 1. Audit
100
104
  console.print("\n[bold]Step 1: Code Optimization Audit[/bold]")
101
- subprocess.run([sys.executable, "-m", "backend.optimizer", "audit", "--no-interactive"], env={**os.environ, "PYTHONPATH": "src"})
105
+ opt_mod.audit("src/backend/agent.py", interactive=False)
102
106
 
103
107
  # 2. Build Frontend
104
108
  console.print("\n[bold]Step 2: Building Frontend Assets[/bold]")
@@ -163,7 +167,7 @@ def create(
163
167
  f"[bold]Quick Start:[/bold]\n"
164
168
  f" 1. [dim]cd[/dim] {project_name}\n"
165
169
  f" 2. [dim]{'npm install' if ui != 'flutter' else 'flutter pub get'}[/dim]\n"
166
- f" 3. [dim]uvx agent-ops-cockpit audit[/dim]\n"
170
+ f" 3. [dim]agent-ops audit[/dim]\n"
167
171
  f" 4. [dim]{start_cmd}[/dim]\n\n"
168
172
  f"Configuration: UI=[bold cyan]{ui}[/bold cyan], CopilotKit=[bold cyan]{'Enabled' if copilotkit else 'Disabled'}[/bold cyan]",
169
173
  title="[bold green]Project Scaffolding Complete[/bold green]",
@@ -0,0 +1,100 @@
1
+ import typer
2
+ import os
3
+ from rich.console import Console
4
+ from rich.table import Table
5
+ from rich.panel import Panel
6
+
7
+ app = typer.Typer(help="Agent Architecture Reviewer: Audit your design against Google Well-Architected Framework.")
8
+ console = Console()
9
+
10
+ from agent_ops_cockpit.ops.frameworks import detect_framework, FRAMEWORKS
11
+
12
+ @app.command()
13
+ def audit(path: str = "."):
14
+ """
15
+ Run the Architecture Design Review based on detected framework.
16
+ """
17
+ framework_key = detect_framework(path)
18
+ framework_data = FRAMEWORKS[framework_key]
19
+ checklist = framework_data["checklist"]
20
+ framework_name = framework_data["name"]
21
+
22
+ console.print(Panel.fit(f"🏛️ [bold blue]{framework_name.upper()}: ARCHITECTURE REVIEW[/bold blue]", border_style="blue"))
23
+ console.print(f"Detected Framework: [bold green]{framework_name}[/bold green]")
24
+ console.print(f"Comparing local agent implementation against [bold]{framework_name} Best Practices[/bold]...\n")
25
+
26
+ # Read all relevant code files for inspection
27
+ code_content = ""
28
+ for root, dirs, files in os.walk(path):
29
+ if any(d in root for d in [".venv", "node_modules", ".git"]): continue
30
+ for file in files:
31
+ if file.endswith((".py", ".ts", ".tsx", ".js")):
32
+ try:
33
+ with open(os.path.join(root, file), 'r') as f:
34
+ code_content += f.read() + "\n"
35
+ except Exception:
36
+ pass
37
+
38
+ total_checks = 0
39
+ passed_checks = 0
40
+
41
+ for section in checklist:
42
+ table = Table(title=section["category"], show_header=True, header_style="bold magenta")
43
+ table.add_column("Design Check", style="cyan")
44
+ table.add_column("Status", style="green", justify="center")
45
+ table.add_column("Rationale", style="dim")
46
+
47
+ for check_text, rationale in section["checks"]:
48
+ total_checks += 1
49
+ # Simple heuristic audit: check if certain keywords exist in the code
50
+ keywords = {
51
+ "PII": ["scrub", "mask", "pii", "filter"],
52
+ "Sandbox": ["sandbox", "docker", "isolated", "gvisor"],
53
+ "Caching": ["cache", "redis", "memorystore", "hive_mind"],
54
+ "Identity": ["iam", "auth", "token", "oauth", "workloadidentity"],
55
+ "Moderation": ["moderate", "safety", "filter"],
56
+ "Routing": ["router", "switch", "map", "agentengine"],
57
+ "Outputs": ["schema", "json", "structured"],
58
+ "HITL": ["approve", "confirm", "human"],
59
+ "Confirmation": ["confirm", "ask", "approve"],
60
+ "Logging": ["log", "trace", "audit", "reasoningengine"],
61
+ "Cloud Run": ["startupcpu", "boost", "minInstances"],
62
+ "GKE": ["kubectl", "k8s", "autopilot", "helm"],
63
+ "VPC": ["vpcnc", "sc-env", "isolation"],
64
+ "A2UI": ["a2ui", "renderer", "registry", "component"],
65
+ "Responsive": ["@media", "max-width", "flex", "grid", "vw", "vh"],
66
+ "Accessibility": ["aria-", "role=", "alt=", "tabindex"],
67
+ "Triggers": ["trigger", "callback", "handle", "onclick"]
68
+ }
69
+
70
+ check_key = check_text.split(":")[0].strip()
71
+ status = "[yellow]PENDING[/yellow]"
72
+
73
+ # If any keyword for this check type is found, mark as PASSED
74
+ matched = False
75
+ for k, words in keywords.items():
76
+ if k.lower() in check_key.lower():
77
+ if any(word in code_content.lower() for word in words):
78
+ matched = True
79
+ break
80
+
81
+ if matched:
82
+ status = "[bold green]PASSED[/bold green]"
83
+ passed_checks += 1
84
+ else:
85
+ status = "[bold red]FAIL[/bold red]"
86
+
87
+ table.add_row(check_text, status, rationale)
88
+
89
+ console.print(table)
90
+ console.print("\n")
91
+
92
+ score = (passed_checks / total_checks) * 100 if total_checks > 0 else 0
93
+ console.print(f"📊 [bold]Review Score: {score:.0f}/100[/bold]")
94
+ if score >= 80:
95
+ console.print(f"✅ [bold green]Architecture Review Complete.[/bold green] Your agent is well-aligned with {framework_name} patterns.")
96
+ else:
97
+ console.print("⚠️ [bold yellow]Review Complete with warnings.[/bold yellow] Your agent has gaps in best practices. See results above.")
98
+
99
+ if __name__ == "__main__":
100
+ app()
@@ -0,0 +1,80 @@
1
+ from typing import List, Dict, Any, Optional
2
+ import asyncio
3
+ import json
4
+ import os
5
+ from mcp import ClientSession, StdioServerParameters
6
+ from mcp.client.stdio import stdio_client
7
+
8
+ class MCPHub:
9
+ """
10
+ Model Context Protocol (MCP) Hub.
11
+ Provides a unified interface for tool discovery and execution across
12
+ multiple MCP servers (Google Search, SQL, internal tools).
13
+ """
14
+
15
+ def __init__(self):
16
+ self.servers: Dict[str, StdioServerParameters] = {}
17
+ self.registry = {
18
+ "search": {"type": "mcp", "provider": "google-search", "server": "google-search-mcp"},
19
+ "db": {"type": "mcp", "provider": "alloydb", "server": "postgres-mcp"},
20
+ "legacy_crm": {"type": "rest", "provider": "internal", "status": "deprecated"}
21
+ }
22
+
23
+ def register_server(self, name: str, command: str, args: List[str] = None):
24
+ """Registers a local MCP server."""
25
+ self.servers[name] = StdioServerParameters(
26
+ command=command,
27
+ args=args or [],
28
+ env=os.environ.copy()
29
+ )
30
+
31
+ async def execute_tool(self, tool_name: str, arguments: Dict[str, Any]):
32
+ """
33
+ Executes a tool call using the Model Context Protocol.
34
+ """
35
+ if tool_name not in self.registry:
36
+ raise ValueError(f"Tool {tool_name} not found in MCP registry.")
37
+
38
+ config = self.registry[tool_name]
39
+
40
+ # If it's a legacy tool, handle it separately
41
+ if config["type"] == "rest":
42
+ print(f"⚠️ Executing legacy REST tool: {tool_name}")
43
+ return await self._mock_legacy_exec(tool_name, arguments)
44
+
45
+ server_name = config.get("server")
46
+ if not server_name or server_name not in self.servers:
47
+ # Fallback to mock for demo/unconfigured environments
48
+ print(f"ℹ️ MCP Server '{server_name}' not configured. Running in simulated mode.")
49
+ return await self._mock_mcp_exec(tool_name, arguments)
50
+
51
+ # Real MCP Protocol Execution
52
+ async with stdio_client(self.servers[server_name]) as (read, write):
53
+ async with ClientSession(read, write) as session:
54
+ await session.initialize()
55
+ result = await session.call_tool(tool_name, arguments)
56
+ return {
57
+ "result": result.content,
58
+ "protocol": "mcp-v1",
59
+ "server": server_name
60
+ }
61
+
62
+ async def _mock_mcp_exec(self, tool_name: str, args: Dict[str, Any]):
63
+ await asyncio.sleep(0.2)
64
+ return {
65
+ "result": f"Simulated MCP response for {tool_name}",
66
+ "protocol": "mcp-virtual",
67
+ "assurance": 0.95
68
+ }
69
+
70
+ async def _mock_legacy_exec(self, tool_name: str, args: Dict[str, Any]):
71
+ await asyncio.sleep(0.5)
72
+ return {
73
+ "result": f"Legacy response for {tool_name}",
74
+ "protocol": "rest-legacy",
75
+ "warning": "MIGRATE_TO_MCP"
76
+ }
77
+
78
+ global_mcp_hub = MCPHub()
79
+ # Example registration (commented out as it requires local binaries)
80
+ # global_mcp_hub.register_server("google-search-mcp", "npx", ["-y", "@modelcontextprotocol/server-google-search"])
@@ -0,0 +1,103 @@
1
+ import os
2
+ from datetime import datetime
3
+ from rich.console import Console
4
+ from rich.panel import Panel
5
+ from rich.table import Table
6
+
7
+ # Import from package namespace
8
+ from agent_ops_cockpit.ops import arch_review, reliability, secret_scanner, ui_auditor
9
+ from agent_ops_cockpit.eval import quality_climber, red_team
10
+ from agent_ops_cockpit import optimizer
11
+
12
+ console = Console()
13
+
14
+ class CockpitOrchestrator:
15
+ """
16
+ Main orchestrator for AgentOps audits.
17
+ Runs Arch Review, Quality Baseline, Red Team, and Performance tests.
18
+ """
19
+
20
+ def __init__(self):
21
+ self.timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
22
+ self.report_path = "cockpit_final_report.md"
23
+ self.results = {}
24
+
25
+ def run_step(self, name: str, func, *args, **kwargs):
26
+ console.print(f"\n🚀 [bold]Step: {name}[/bold]")
27
+ try:
28
+ # Capturing output for internal functions is harder without redirecting stdout
29
+ # For now, we just execute them for the effect and note success
30
+ func(*args, **kwargs)
31
+ self.results[name] = {
32
+ "success": True,
33
+ "output": f"Audit {name} executed successfully."
34
+ }
35
+ console.print(f"✅ {name} Completed.")
36
+ except Exception as e:
37
+ self.results[name] = {"success": False, "output": str(e)}
38
+ console.print(f"❌ {name} Failed: {e}")
39
+
40
+ def generate_report(self):
41
+ report = [
42
+ "# 🏁 AgentOps Cockpit: Final Audit Report",
43
+ f"**Timestamp**: {self.timestamp}",
44
+ f"**Status**: {'PASS' if all(r['success'] for r in self.results.values()) else 'FAIL'}",
45
+ "\n---",
46
+ "\n## 📊 Executive Summary"
47
+ ]
48
+
49
+ summary_table = Table(show_header=True, header_style="bold magenta")
50
+ summary_table.add_column("Audit Type")
51
+ summary_table.add_column("Status")
52
+
53
+ for name, data in self.results.items():
54
+ status = "✅ PASS" if data["success"] else "❌ FAIL"
55
+ summary_table.add_row(name, status)
56
+ report.append(f"- **{name}**: {status}")
57
+
58
+ console.print("\n", summary_table)
59
+
60
+ report.append("\n## 🔍 Detailed Findings")
61
+ for name, data in self.results.items():
62
+ report.append(f"\n### {name}")
63
+ report.append(data["output"])
64
+
65
+ report.append("\n---")
66
+ report.append("\n*Generated by the AgentOps Cockpit Orchestrator.*")
67
+
68
+ with open(self.report_path, "w") as f:
69
+ f.write("\n".join(report))
70
+
71
+ console.print(f"\n✨ [bold green]Final Report generated at {self.report_path}[/bold green]")
72
+
73
+ def run_full_audit():
74
+ orchestrator = CockpitOrchestrator()
75
+
76
+ console.print(Panel.fit(
77
+ "🕹️ [bold blue]AGENTOPS COCKPIT: FULL SYSTEM AUDIT[/bold blue]\nLaunching all governance and optimization modules...",
78
+ border_style="blue"
79
+ ))
80
+
81
+ # 1. Architecture Review
82
+ orchestrator.run_step("Architecture Review", arch_review.audit, path=".")
83
+
84
+ # 2. Quality Baseline
85
+ orchestrator.run_step("Quality Baseline", quality_climber.audit, path=".")
86
+
87
+ # 3. Security & Secrets
88
+ orchestrator.run_step("Secret Scanner (Leak Detection)", secret_scanner.scan, path=".")
89
+ orchestrator.run_step("Adversarial Security (Red Team)", red_team.audit, agent_path="src/backend/agent.py")
90
+
91
+ # 4. Face (UI/UX) Audit
92
+ orchestrator.run_step("UI/UX Quality (Face Auditor)", ui_auditor.audit, path="src")
93
+
94
+ # 5. Token Optimization Audit
95
+ orchestrator.run_step("Token Optimization Audit", optimizer.audit, file_path="src/backend/agent.py", interactive=False)
96
+
97
+ # 6. Reliability Audit (Unit + Regression)
98
+ orchestrator.run_step("Reliability (Unit + Regression)", reliability.run_tests)
99
+
100
+ orchestrator.generate_report()
101
+
102
+ if __name__ == "__main__":
103
+ run_full_audit()