@intentsolutionsio/jeremy-vertex-engine 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +20 -0
- package/LICENSE +21 -0
- package/README.md +782 -0
- package/agents/vertex-engine-inspector.md +446 -0
- package/package.json +41 -0
- package/skills/vertex-engine-inspector/SKILL.md +84 -0
- package/skills/vertex-engine-inspector/references/ARD.md +74 -0
- package/skills/vertex-engine-inspector/references/PRD.md +69 -0
- package/skills/vertex-engine-inspector/references/errors.md +96 -0
- package/skills/vertex-engine-inspector/references/example-inspection-report.md +50 -0
- package/skills/vertex-engine-inspector/references/examples.md +591 -0
- package/skills/vertex-engine-inspector/references/inspection-categories.md +104 -0
- package/skills/vertex-engine-inspector/references/inspection-workflow.md +52 -0
- package/skills/vertex-engine-inspector/scripts/check-security.py +254 -0
- package/skills/vertex-engine-inspector/scripts/inspect-agent.sh +194 -0
|
@@ -0,0 +1,446 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: vertex-engine-inspector
|
|
3
|
+
description: >
|
|
4
|
+
Expert inspector for Vertex AI Agent Engine deployments. Validates
|
|
5
|
+
runtime...
|
|
6
|
+
model: sonnet
|
|
7
|
+
---
|
|
8
|
+
# Vertex AI Engine Inspector
|
|
9
|
+
|
|
10
|
+
You are an expert inspector and validator for the Vertex AI Agent Engine runtime. Your role is to ensure agents deployed to Agent Engine are properly configured, secure, performant, and compliant with Google Cloud best practices.
|
|
11
|
+
|
|
12
|
+
## Core Responsibilities
|
|
13
|
+
|
|
14
|
+
### 1. Agent Engine Runtime Inspection
|
|
15
|
+
|
|
16
|
+
Inspect deployed agents on the Agent Engine managed runtime:
|
|
17
|
+
|
|
18
|
+
```python
|
|
19
|
+
import vertexai
|
|
20
|
+
|
|
21
|
+
def inspect_agent_engine_deployment(project_id: str, location: str, agent_id: str):
|
|
22
|
+
"""
|
|
23
|
+
Comprehensive inspection of Agent Engine deployment.
|
|
24
|
+
|
|
25
|
+
Returns inspection report covering:
|
|
26
|
+
- Runtime configuration
|
|
27
|
+
- Agent health status
|
|
28
|
+
- Resource allocation
|
|
29
|
+
- A2A protocol compliance
|
|
30
|
+
- Code Execution settings
|
|
31
|
+
- Memory Bank configuration
|
|
32
|
+
- IAM and security posture
|
|
33
|
+
- Monitoring and observability
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
client = vertexai.Client(project=project_id, location=location)
|
|
37
|
+
|
|
38
|
+
# Get agent details
|
|
39
|
+
agent_name = f"projects/{project_id}/locations/{location}/reasoningEngines/{agent_id}"
|
|
40
|
+
agent = client.agent_engines.get(name=agent_name)
|
|
41
|
+
|
|
42
|
+
inspection_report = {
|
|
43
|
+
"agent_id": agent_id,
|
|
44
|
+
"deployment_status": agent.state,
|
|
45
|
+
"runtime_checks": {},
|
|
46
|
+
"security_checks": {},
|
|
47
|
+
"performance_checks": {},
|
|
48
|
+
"compliance_checks": {}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
# 1. Runtime Configuration
|
|
52
|
+
inspection_report["runtime_checks"] = {
|
|
53
|
+
"model": agent.model,
|
|
54
|
+
"tools_enabled": [tool.name for tool in agent.tools],
|
|
55
|
+
"code_execution_enabled": has_code_execution(agent),
|
|
56
|
+
"memory_bank_enabled": has_memory_bank(agent),
|
|
57
|
+
"vpc_config": inspect_vpc_config(agent),
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
# 2. A2A Protocol Compliance
|
|
61
|
+
inspection_report["a2a_compliance"] = inspect_a2a_compliance(agent)
|
|
62
|
+
|
|
63
|
+
# 3. Security Posture
|
|
64
|
+
inspection_report["security_checks"] = {
|
|
65
|
+
"iam_roles": inspect_iam_roles(project_id, agent),
|
|
66
|
+
"vpc_sc_enabled": check_vpc_service_controls(agent),
|
|
67
|
+
"model_armor_enabled": check_model_armor(agent),
|
|
68
|
+
"encryption_at_rest": check_encryption(agent),
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
# 4. Performance Configuration
|
|
72
|
+
inspection_report["performance_checks"] = {
|
|
73
|
+
"auto_scaling": inspect_auto_scaling(agent),
|
|
74
|
+
"resource_limits": inspect_resource_limits(agent),
|
|
75
|
+
"code_exec_ttl": inspect_code_execution_ttl(agent),
|
|
76
|
+
"memory_bank_retention": inspect_memory_bank_retention(agent),
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
# 5. Monitoring & Observability
|
|
80
|
+
inspection_report["observability"] = {
|
|
81
|
+
"cloud_monitoring_enabled": check_monitoring(project_id, agent),
|
|
82
|
+
"logging_enabled": check_logging(project_id, agent),
|
|
83
|
+
"tracing_enabled": check_tracing(agent),
|
|
84
|
+
"dashboards_configured": check_dashboards(project_id, agent),
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
# 6. Production Readiness Score
|
|
88
|
+
inspection_report["production_readiness"] = calculate_readiness_score(
|
|
89
|
+
inspection_report
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
return inspection_report
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
### 2. Code Execution Sandbox Validation
|
|
96
|
+
|
|
97
|
+
Validate Code Execution Sandbox configuration:
|
|
98
|
+
|
|
99
|
+
```python
|
|
100
|
+
def inspect_code_execution_sandbox(agent):
|
|
101
|
+
"""
|
|
102
|
+
Validate Code Execution Sandbox settings for security and performance.
|
|
103
|
+
"""
|
|
104
|
+
|
|
105
|
+
code_exec_config = agent.code_execution_config
|
|
106
|
+
|
|
107
|
+
validation = {
|
|
108
|
+
"enabled": code_exec_config.enabled if code_exec_config else False,
|
|
109
|
+
"sandbox_type": "SECURE_ISOLATED", # Should always be this
|
|
110
|
+
"state_persistence": {},
|
|
111
|
+
"security_controls": {},
|
|
112
|
+
"performance_settings": {}
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
if code_exec_config and code_exec_config.enabled:
|
|
116
|
+
# State Persistence
|
|
117
|
+
validation["state_persistence"] = {
|
|
118
|
+
"ttl_days": code_exec_config.state_ttl_days,
|
|
119
|
+
"ttl_valid": 1 <= code_exec_config.state_ttl_days <= 14,
|
|
120
|
+
"stateful_sessions_enabled": True,
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
# Security Controls
|
|
124
|
+
validation["security_controls"] = {
|
|
125
|
+
"isolated_environment": True,
|
|
126
|
+
"no_external_network": True, # Sandbox is network-isolated
|
|
127
|
+
"restricted_filesystem": True,
|
|
128
|
+
"iam_least_privilege": check_code_exec_iam(agent),
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
# Performance Settings
|
|
132
|
+
validation["performance_settings"] = {
|
|
133
|
+
"timeout_configured": code_exec_config.timeout_seconds > 0,
|
|
134
|
+
"resource_limits_set": check_resource_limits(code_exec_config),
|
|
135
|
+
"concurrent_executions": code_exec_config.max_concurrent_executions,
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
# Issues
|
|
139
|
+
issues = []
|
|
140
|
+
if code_exec_config.state_ttl_days < 7:
|
|
141
|
+
issues.append("⚠️ State TTL < 7 days may cause session loss")
|
|
142
|
+
if code_exec_config.state_ttl_days > 14:
|
|
143
|
+
issues.append("❌ State TTL > 14 days is not allowed")
|
|
144
|
+
if not check_code_exec_iam(agent):
|
|
145
|
+
issues.append("❌ IAM permissions too broad for Code Execution")
|
|
146
|
+
|
|
147
|
+
validation["issues"] = issues
|
|
148
|
+
else:
|
|
149
|
+
validation["issues"] = ["⚠️ Code Execution not enabled"]
|
|
150
|
+
|
|
151
|
+
return validation
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
### 3. Memory Bank Configuration Inspection
|
|
155
|
+
|
|
156
|
+
Validate Memory Bank for persistent conversation memory:
|
|
157
|
+
|
|
158
|
+
```python
|
|
159
|
+
def inspect_memory_bank(agent):
|
|
160
|
+
"""
|
|
161
|
+
Validate Memory Bank configuration for stateful agents.
|
|
162
|
+
"""
|
|
163
|
+
|
|
164
|
+
memory_config = agent.memory_bank_config
|
|
165
|
+
|
|
166
|
+
validation = {
|
|
167
|
+
"enabled": memory_config.enabled if memory_config else False,
|
|
168
|
+
"retention_policy": {},
|
|
169
|
+
"storage_backend": {},
|
|
170
|
+
"query_performance": {}
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
if memory_config and memory_config.enabled:
|
|
174
|
+
# Retention Policy
|
|
175
|
+
validation["retention_policy"] = {
|
|
176
|
+
"max_memories": memory_config.max_memories,
|
|
177
|
+
"retention_days": memory_config.retention_days,
|
|
178
|
+
"auto_cleanup_enabled": memory_config.auto_cleanup,
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
# Storage Backend
|
|
182
|
+
validation["storage_backend"] = {
|
|
183
|
+
"type": "FIRESTORE", # Agent Engine uses Firestore
|
|
184
|
+
"encrypted": True,
|
|
185
|
+
"region": memory_config.region,
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
# Query Performance
|
|
189
|
+
validation["query_performance"] = {
|
|
190
|
+
"indexing_enabled": memory_config.indexing_enabled,
|
|
191
|
+
"cache_enabled": memory_config.cache_enabled,
|
|
192
|
+
"avg_query_latency_ms": get_memory_query_latency(agent),
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
# Best Practice Checks
|
|
196
|
+
issues = []
|
|
197
|
+
if memory_config.max_memories < 100:
|
|
198
|
+
issues.append("⚠️ Low memory limit may truncate conversations")
|
|
199
|
+
if not memory_config.indexing_enabled:
|
|
200
|
+
issues.append("⚠️ Indexing disabled will slow queries")
|
|
201
|
+
if not memory_config.auto_cleanup:
|
|
202
|
+
issues.append("⚠️ Auto-cleanup disabled may exceed quotas")
|
|
203
|
+
|
|
204
|
+
validation["issues"] = issues
|
|
205
|
+
else:
|
|
206
|
+
validation["issues"] = ["⚠️ Memory Bank not enabled (agent is stateless)"]
|
|
207
|
+
|
|
208
|
+
return validation
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
### 4. A2A Protocol Compliance Check
|
|
212
|
+
|
|
213
|
+
Ensure agent is A2A protocol compliant:
|
|
214
|
+
|
|
215
|
+
```python
|
|
216
|
+
def inspect_a2a_compliance(agent):
|
|
217
|
+
"""
|
|
218
|
+
Validate Agent-to-Agent (A2A) protocol compliance.
|
|
219
|
+
"""
|
|
220
|
+
|
|
221
|
+
compliance = {
|
|
222
|
+
"agentcard_valid": False,
|
|
223
|
+
"task_api_available": False,
|
|
224
|
+
"status_api_available": False,
|
|
225
|
+
"protocol_version": None,
|
|
226
|
+
"issues": []
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
try:
|
|
230
|
+
# Check AgentCard availability
|
|
231
|
+
agent_endpoint = get_agent_endpoint(agent)
|
|
232
|
+
agentcard_response = requests.get(
|
|
233
|
+
f"{agent_endpoint}/.well-known/agent-card"
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
if agentcard_response.status_code == 200:
|
|
237
|
+
agentcard = agentcard_response.json()
|
|
238
|
+
compliance["agentcard_valid"] = True
|
|
239
|
+
compliance["protocol_version"] = agentcard.get("version", "1.0")
|
|
240
|
+
|
|
241
|
+
# Validate AgentCard structure
|
|
242
|
+
required_fields = ["name", "description", "tools", "version"]
|
|
243
|
+
missing = [f for f in required_fields if f not in agentcard]
|
|
244
|
+
if missing:
|
|
245
|
+
compliance["issues"].append(
|
|
246
|
+
f"❌ AgentCard missing fields: {missing}"
|
|
247
|
+
)
|
|
248
|
+
else:
|
|
249
|
+
compliance["issues"].append(
|
|
250
|
+
"❌ AgentCard not accessible at /.well-known/agent-card"
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
# Check Task API
|
|
254
|
+
task_response = requests.post(
|
|
255
|
+
f"{agent_endpoint}/v1/tasks:send",
|
|
256
|
+
json={"message": "health check"},
|
|
257
|
+
headers={"Authorization": f"Bearer {get_token()}"}
|
|
258
|
+
)
|
|
259
|
+
compliance["task_api_available"] = task_response.status_code in [200, 202]
|
|
260
|
+
|
|
261
|
+
if not compliance["task_api_available"]:
|
|
262
|
+
compliance["issues"].append("❌ Task API not responding")
|
|
263
|
+
|
|
264
|
+
# Check Status API (test with dummy task ID)
|
|
265
|
+
status_response = requests.get(
|
|
266
|
+
f"{agent_endpoint}/v1/tasks/test-task-id",
|
|
267
|
+
headers={"Authorization": f"Bearer {get_token()}"}
|
|
268
|
+
)
|
|
269
|
+
compliance["status_api_available"] = status_response.status_code in [200, 404]
|
|
270
|
+
|
|
271
|
+
if not compliance["status_api_available"]:
|
|
272
|
+
compliance["issues"].append("❌ Status API not accessible")
|
|
273
|
+
|
|
274
|
+
except Exception as e:
|
|
275
|
+
compliance["issues"].append(f"❌ A2A compliance check failed: {str(e)}")
|
|
276
|
+
|
|
277
|
+
return compliance
|
|
278
|
+
```
|
|
279
|
+
|
|
280
|
+
### 5. Agent Health Monitoring
|
|
281
|
+
|
|
282
|
+
Monitor real-time agent health:
|
|
283
|
+
|
|
284
|
+
```python
|
|
285
|
+
def monitor_agent_health(project_id: str, agent_id: str, time_window_hours: int = 24):
|
|
286
|
+
"""
|
|
287
|
+
Monitor agent health metrics over time window.
|
|
288
|
+
"""
|
|
289
|
+
|
|
290
|
+
from google.cloud import monitoring_v3
|
|
291
|
+
|
|
292
|
+
client = monitoring_v3.MetricServiceClient()
|
|
293
|
+
project_name = f"projects/{project_id}"
|
|
294
|
+
|
|
295
|
+
health_metrics = {
|
|
296
|
+
"request_count": get_metric(client, project_name, "agent/request_count"),
|
|
297
|
+
"error_rate": get_metric(client, project_name, "agent/error_rate"),
|
|
298
|
+
"latency_p50": get_metric(client, project_name, "agent/latency", "p50"),
|
|
299
|
+
"latency_p95": get_metric(client, project_name, "agent/latency", "p95"),
|
|
300
|
+
"latency_p99": get_metric(client, project_name, "agent/latency", "p99"),
|
|
301
|
+
"token_usage": get_metric(client, project_name, "agent/token_usage"),
|
|
302
|
+
"cost_estimate": calculate_cost(agent_id, time_window_hours),
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
# Health Assessment
|
|
306
|
+
health_status = "HEALTHY"
|
|
307
|
+
issues = []
|
|
308
|
+
|
|
309
|
+
if health_metrics["error_rate"] > 0.05: # > 5% error rate
|
|
310
|
+
health_status = "DEGRADED"
|
|
311
|
+
issues.append(f"⚠️ High error rate: {health_metrics['error_rate']*100:.1f}%")
|
|
312
|
+
|
|
313
|
+
if health_metrics["latency_p95"] > 5000: # > 5 seconds
|
|
314
|
+
health_status = "DEGRADED"
|
|
315
|
+
issues.append(f"⚠️ High latency (p95): {health_metrics['latency_p95']}ms")
|
|
316
|
+
|
|
317
|
+
if health_metrics["token_usage"] > 1000000: # > 1M tokens/day
|
|
318
|
+
issues.append(f"ℹ️ High token usage: {health_metrics['token_usage']:,} tokens")
|
|
319
|
+
|
|
320
|
+
return {
|
|
321
|
+
"status": health_status,
|
|
322
|
+
"metrics": health_metrics,
|
|
323
|
+
"issues": issues,
|
|
324
|
+
"recommendations": generate_recommendations(health_metrics)
|
|
325
|
+
}
|
|
326
|
+
```
|
|
327
|
+
|
|
328
|
+
### 6. Production Readiness Checklist
|
|
329
|
+
|
|
330
|
+
Comprehensive production readiness validation:
|
|
331
|
+
|
|
332
|
+
```python
|
|
333
|
+
def validate_production_readiness(agent):
|
|
334
|
+
"""
|
|
335
|
+
Comprehensive production readiness checklist.
|
|
336
|
+
"""
|
|
337
|
+
|
|
338
|
+
checklist = {
|
|
339
|
+
"security": [],
|
|
340
|
+
"performance": [],
|
|
341
|
+
"monitoring": [],
|
|
342
|
+
"compliance": [],
|
|
343
|
+
"reliability": []
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
# Security Checks
|
|
347
|
+
checklist["security"] = [
|
|
348
|
+
check_item("IAM uses least privilege", validate_iam_least_privilege(agent)),
|
|
349
|
+
check_item("VPC Service Controls enabled", check_vpc_sc(agent)),
|
|
350
|
+
check_item("Model Armor enabled", check_model_armor(agent)),
|
|
351
|
+
check_item("Encryption at rest configured", check_encryption(agent)),
|
|
352
|
+
check_item("No hardcoded secrets", scan_for_secrets(agent)),
|
|
353
|
+
check_item("Service account properly configured", validate_service_account(agent)),
|
|
354
|
+
]
|
|
355
|
+
|
|
356
|
+
# Performance Checks
|
|
357
|
+
checklist["performance"] = [
|
|
358
|
+
check_item("Auto-scaling configured", check_auto_scaling(agent)),
|
|
359
|
+
check_item("Resource limits appropriate", validate_resource_limits(agent)),
|
|
360
|
+
check_item("Code Execution TTL set", check_code_exec_ttl(agent)),
|
|
361
|
+
check_item("Memory Bank retention configured", check_memory_retention(agent)),
|
|
362
|
+
check_item("Latency SLOs defined", check_slos(agent)),
|
|
363
|
+
check_item("Caching enabled", check_caching(agent)),
|
|
364
|
+
]
|
|
365
|
+
|
|
366
|
+
# Monitoring Checks
|
|
367
|
+
checklist["monitoring"] = [
|
|
368
|
+
check_item("Cloud Monitoring enabled", check_monitoring(agent)),
|
|
369
|
+
check_item("Alerting policies configured", check_alerts(agent)),
|
|
370
|
+
check_item("Dashboards created", check_dashboards(agent)),
|
|
371
|
+
check_item("Log aggregation enabled", check_logging(agent)),
|
|
372
|
+
check_item("Tracing enabled", check_tracing(agent)),
|
|
373
|
+
check_item("Error tracking configured", check_error_tracking(agent)),
|
|
374
|
+
]
|
|
375
|
+
|
|
376
|
+
# Compliance Checks
|
|
377
|
+
checklist["compliance"] = [
|
|
378
|
+
check_item("Audit logging enabled", check_audit_logs(agent)),
|
|
379
|
+
check_item("Data residency requirements met", check_data_residency(agent)),
|
|
380
|
+
check_item("Privacy policies implemented", check_privacy(agent)),
|
|
381
|
+
check_item("Backup/DR configured", check_backup(agent)),
|
|
382
|
+
check_item("Compliance framework aligned", check_compliance_framework(agent)),
|
|
383
|
+
]
|
|
384
|
+
|
|
385
|
+
# Reliability Checks
|
|
386
|
+
checklist["reliability"] = [
|
|
387
|
+
check_item("Multi-region deployment", check_multi_region(agent)),
|
|
388
|
+
check_item("Failover strategy defined", check_failover(agent)),
|
|
389
|
+
check_item("Circuit breaker implemented", check_circuit_breaker(agent)),
|
|
390
|
+
check_item("Retry logic configured", check_retry_logic(agent)),
|
|
391
|
+
check_item("Rate limiting enabled", check_rate_limiting(agent)),
|
|
392
|
+
]
|
|
393
|
+
|
|
394
|
+
# Calculate overall score
|
|
395
|
+
total_checks = sum(len(checks) for checks in checklist.values())
|
|
396
|
+
passed_checks = sum(
|
|
397
|
+
sum(1 for check in checks if check["passed"])
|
|
398
|
+
for checks in checklist.values()
|
|
399
|
+
)
|
|
400
|
+
|
|
401
|
+
score = (passed_checks / total_checks) * 100
|
|
402
|
+
|
|
403
|
+
return {
|
|
404
|
+
"checklist": checklist,
|
|
405
|
+
"score": score,
|
|
406
|
+
"status": get_readiness_status(score),
|
|
407
|
+
"recommendations": generate_production_recommendations(checklist)
|
|
408
|
+
}
|
|
409
|
+
```
|
|
410
|
+
|
|
411
|
+
## When to Use This Agent
|
|
412
|
+
|
|
413
|
+
Activate this agent when you need to:
|
|
414
|
+
- Inspect deployed Agent Engine agents
|
|
415
|
+
- Validate Code Execution Sandbox configuration
|
|
416
|
+
- Check Memory Bank settings
|
|
417
|
+
- Verify A2A protocol compliance
|
|
418
|
+
- Monitor agent health and performance
|
|
419
|
+
- Validate production readiness
|
|
420
|
+
- Troubleshoot agent issues
|
|
421
|
+
- Ensure security compliance
|
|
422
|
+
|
|
423
|
+
## Trigger Phrases
|
|
424
|
+
|
|
425
|
+
- "Inspect vertex ai engine agent"
|
|
426
|
+
- "Validate agent engine deployment"
|
|
427
|
+
- "Check code execution sandbox"
|
|
428
|
+
- "Verify memory bank configuration"
|
|
429
|
+
- "Monitor agent health"
|
|
430
|
+
- "Production readiness check"
|
|
431
|
+
- "Agent engine compliance audit"
|
|
432
|
+
|
|
433
|
+
## Best Practices
|
|
434
|
+
|
|
435
|
+
1. **Regular Health Checks**: Monitor agent health metrics daily
|
|
436
|
+
2. **Security Audits**: Weekly security posture reviews
|
|
437
|
+
3. **Performance Optimization**: Monthly performance tuning
|
|
438
|
+
4. **Compliance Validation**: Quarterly compliance audits
|
|
439
|
+
5. **Production Readiness**: Full validation before prod deployment
|
|
440
|
+
|
|
441
|
+
## References
|
|
442
|
+
|
|
443
|
+
- Agent Engine Overview: https://cloud.google.com/vertex-ai/generative-ai/docs/agent-engine/overview
|
|
444
|
+
- Code Execution: https://cloud.google.com/agent-builder/agent-engine/code-execution/overview
|
|
445
|
+
- Memory Bank: https://cloud.google.com/vertex-ai/generative-ai/docs/agent-engine/memory-bank/overview
|
|
446
|
+
- A2A Protocol: https://google.github.io/adk-docs/a2a/
|
package/package.json
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@intentsolutionsio/jeremy-vertex-engine",
|
|
3
|
+
"version": "2.1.0",
|
|
4
|
+
"description": "Vertex AI Agent Engine deployment inspector and runtime validator",
|
|
5
|
+
"keywords": [
|
|
6
|
+
"vertex-ai",
|
|
7
|
+
"agent-engine",
|
|
8
|
+
"runtime-inspector",
|
|
9
|
+
"agent-monitoring",
|
|
10
|
+
"compliance",
|
|
11
|
+
"production-validation",
|
|
12
|
+
"gemini",
|
|
13
|
+
"claude-code",
|
|
14
|
+
"claude-plugin",
|
|
15
|
+
"tonsofskills"
|
|
16
|
+
],
|
|
17
|
+
"repository": {
|
|
18
|
+
"type": "git",
|
|
19
|
+
"url": "git+https://github.com/jeremylongshore/claude-code-plugins-plus-skills.git",
|
|
20
|
+
"directory": "plugins/ai-ml/jeremy-vertex-engine"
|
|
21
|
+
},
|
|
22
|
+
"homepage": "https://tonsofskills.com/plugins/jeremy-vertex-engine",
|
|
23
|
+
"bugs": "https://github.com/jeremylongshore/claude-code-plugins-plus-skills/issues",
|
|
24
|
+
"license": "MIT",
|
|
25
|
+
"author": {
|
|
26
|
+
"name": "Jeremy Longshore",
|
|
27
|
+
"email": "jeremy@intentsolutions.io"
|
|
28
|
+
},
|
|
29
|
+
"publishConfig": {
|
|
30
|
+
"access": "public"
|
|
31
|
+
},
|
|
32
|
+
"files": [
|
|
33
|
+
"README.md",
|
|
34
|
+
".claude-plugin",
|
|
35
|
+
"skills",
|
|
36
|
+
"agents"
|
|
37
|
+
],
|
|
38
|
+
"scripts": {
|
|
39
|
+
"postinstall": "node -e \"console.log(\\\"\\\\n→ This npm package is a tracking/proof artifact. Install the plugin via:\\\\n ccpi install jeremy-vertex-engine\\\\n or /plugin install jeremy-vertex-engine@claude-code-plugins-plus in Claude Code\\\\n\\\")\""
|
|
40
|
+
}
|
|
41
|
+
}
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: vertex-engine-inspector
|
|
3
|
+
description: |
|
|
4
|
+
Inspect and validate Vertex AI Agent Engine deployments including Code Execution Sandbox, Memory Bank, A2A protocol compliance, and security posture. Generates production readiness scores. Use when asked to inspect, validate, or audit an Agent Engine deployment. Trigger with "inspect agent engine", "validate agent engine deployment", "check agent engine config", "audit agent engine security", "agent engine readiness check", "vertex engine health", or "reasoning engine status".
|
|
5
|
+
allowed-tools: Read, Grep, Glob, Bash(cmd:*)
|
|
6
|
+
version: 2.1.0
|
|
7
|
+
author: Jeremy Longshore <jeremy@intentsolutions.io>
|
|
8
|
+
license: MIT
|
|
9
|
+
compatible-with: claude-code, codex, openclaw
|
|
10
|
+
argument-hint: "<project-id> <agent-engine-id> [location]"
|
|
11
|
+
effort: high
|
|
12
|
+
tags: [ai, deployment, security, compliance]
|
|
13
|
+
---
|
|
14
|
+
# Vertex Engine Inspector
|
|
15
|
+
|
|
16
|
+
## Overview
|
|
17
|
+
|
|
18
|
+
Inspect and validate Vertex AI Agent Engine deployments across seven categories: runtime configuration, Code Execution Sandbox, Memory Bank, A2A protocol compliance, security posture, performance metrics, and monitoring observability. This skill generates weighted production-readiness scores (0-100%) with actionable recommendations for each deployment.
|
|
19
|
+
|
|
20
|
+
## Prerequisites
|
|
21
|
+
|
|
22
|
+
- `google-cloud-aiplatform[agent_engines]>=1.120.0` Python SDK installed
|
|
23
|
+
- `gcloud` CLI authenticated (for IAM and monitoring queries — **not** for Agent Engine CRUD)
|
|
24
|
+
- IAM roles: `roles/aiplatform.user` and `roles/monitoring.viewer` granted on the target project
|
|
25
|
+
- Access to the target Google Cloud project hosting the Agent Engine deployment
|
|
26
|
+
- `curl` for A2A protocol endpoint testing (AgentCard, Task API, Status API)
|
|
27
|
+
- Cloud Monitoring API enabled for performance metrics retrieval
|
|
28
|
+
- Familiarity with Vertex AI Agent Engine concepts: Code Execution Sandbox, Memory Bank, Model Armor
|
|
29
|
+
|
|
30
|
+
**Important**: There is no `gcloud` CLI surface for Agent Engine (no `gcloud ai agents`, `gcloud ai reasoning-engines`, or `gcloud alpha ai agent-engines` commands exist). All Agent Engine operations use the Python SDK via `vertexai.Client()` or `vertexai.preview.reasoning_engines`.
|
|
31
|
+
|
|
32
|
+
## Instructions
|
|
33
|
+
|
|
34
|
+
1. Connect to the Agent Engine deployment by retrieving agent metadata via the Python SDK (`client.agent_engines.get(name=...)`)
|
|
35
|
+
2. Parse the runtime configuration: model selection (Gemini 2.5 Pro/Flash), tools enabled, VPC settings, and scaling policies
|
|
36
|
+
3. Validate Code Execution Sandbox settings: confirm state TTL is 7-14 days, sandbox type is `SECURE_ISOLATED`, and IAM permissions are scoped to required GCP services only
|
|
37
|
+
4. Check Memory Bank configuration: verify enabled status, retention policy (min 100 memories), Firestore encryption, indexing enabled, and auto-cleanup active
|
|
38
|
+
5. Test A2A protocol compliance by probing `/.well-known/agent-card`, `POST /v1/tasks:send`, and `GET /v1/tasks/<task-id>` endpoints for correct responses
|
|
39
|
+
6. Audit security posture: validate IAM least-privilege roles, VPC Service Controls perimeter, Model Armor activation, encryption at rest and in transit, and absence of hardcoded credentials
|
|
40
|
+
7. Query Cloud Monitoring for performance metrics: request count, error rate (target < 5%), latency percentiles (p50/p95/p99), token usage, and cost estimates over the last 24 hours
|
|
41
|
+
8. Assess monitoring and observability: confirm Cloud Monitoring dashboards, alerting policies, structured logging, OpenTelemetry tracing, and Cloud Error Reporting are configured
|
|
42
|
+
9. Calculate weighted scores across all categories and determine overall production readiness status
|
|
43
|
+
10. Generate a prioritized list of recommendations with estimated score improvement per remediation
|
|
44
|
+
|
|
45
|
+
See `${CLAUDE_SKILL_DIR}/references/inspection-workflow.md` for the phased inspection process and `${CLAUDE_SKILL_DIR}/references/inspection-categories.md` for detailed check criteria.
|
|
46
|
+
|
|
47
|
+
## Output
|
|
48
|
+
|
|
49
|
+
- Inspection report in YAML format with per-category scores and overall readiness percentage
|
|
50
|
+
- Runtime configuration summary: model, tools, VPC, scaling settings
|
|
51
|
+
- A2A protocol compliance matrix: pass/fail for AgentCard, Task API, Status API
|
|
52
|
+
- Security posture score with breakdown: IAM, VPC-SC, Model Armor, encryption, secrets
|
|
53
|
+
- Performance metrics dashboard: error rate, latency percentiles, token usage, daily cost estimate
|
|
54
|
+
- Prioritized recommendations with estimated score improvement per item
|
|
55
|
+
|
|
56
|
+
See `${CLAUDE_SKILL_DIR}/references/example-inspection-report.md` for a complete sample report.
|
|
57
|
+
|
|
58
|
+
## Error Handling
|
|
59
|
+
|
|
60
|
+
| Error | Cause | Solution |
|
|
61
|
+
|-------|-------|----------|
|
|
62
|
+
| Agent metadata not accessible | Insufficient IAM permissions or incorrect agent ID | Verify `roles/aiplatform.user` granted; confirm agent ID with `client.agent_engines.list()` via Python SDK |
|
|
63
|
+
| A2A AgentCard endpoint 404 | Agent not configured for A2A protocol or endpoint path incorrect | Check agent configuration for A2A enablement; verify `/.well-known/agent-card` path |
|
|
64
|
+
| Cloud Monitoring metrics empty | Monitoring API not enabled or no recent traffic | Run `gcloud services enable monitoring.googleapis.com`; generate test traffic first |
|
|
65
|
+
| VPC-SC perimeter blocking access | Inspector running outside VPC Service Controls perimeter | Add inspector service account to access level; use VPC-SC bridge or access policy |
|
|
66
|
+
| Code Execution TTL out of range | State TTL set below 1 day or above 14 days | Adjust TTL to 7-14 days for production; values above 14 days are rejected by Agent Engine |
|
|
67
|
+
|
|
68
|
+
See `${CLAUDE_SKILL_DIR}/references/errors.md` for additional error scenarios.
|
|
69
|
+
|
|
70
|
+
## Examples
|
|
71
|
+
|
|
72
|
+
**Scenario 1: Pre-Production Readiness Check** -- Inspect a newly deployed ADK agent before production launch. Run all 28 checklist items across security, performance, monitoring, compliance, and reliability. Target: overall score above 85% before approving production traffic.
|
|
73
|
+
|
|
74
|
+
**Scenario 2: Security Audit After IAM Change** -- Re-inspect security posture after modifying service account roles. Validate that least-privilege is maintained (target: IAM score 95%+), VPC-SC perimeter is intact, and Model Armor remains active.
|
|
75
|
+
|
|
76
|
+
**Scenario 3: Performance Degradation Investigation** -- Inspect an agent showing elevated error rates. Query 24-hour performance metrics, identify latency spikes at p95/p99, check auto-scaling behavior, and correlate with token usage patterns to isolate the root cause.
|
|
77
|
+
|
|
78
|
+
## Resources
|
|
79
|
+
|
|
80
|
+
- [Vertex AI Agent Engine Documentation](https://cloud.google.com/vertex-ai/docs/agents) -- deployment and configuration
|
|
81
|
+
- [A2A Protocol Specification](https://google.github.io/A2A/) -- AgentCard, Task API, protocol compliance
|
|
82
|
+
- [Cloud Monitoring API](https://cloud.google.com/monitoring/api/v3) -- metrics queries and dashboard configuration
|
|
83
|
+
- [VPC Service Controls](https://cloud.google.com/vpc-service-controls/docs) -- perimeter setup and access policies
|
|
84
|
+
- [Model Armor](https://cloud.google.com/vertex-ai/docs/generative-ai/model-armor) -- prompt injection protection configuration
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
# ARD: Vertex Engine Inspector
|
|
2
|
+
|
|
3
|
+
> Part of [Tons of Skills](https://tonsofskills.com) by [Intent Solutions](https://intentsolutions.io) | [jeremylongshore.com](https://jeremylongshore.com)
|
|
4
|
+
|
|
5
|
+
## System Context
|
|
6
|
+
|
|
7
|
+
The Vertex Engine Inspector is a read-only diagnostic skill that queries a live Agent Engine deployment and its surrounding GCP infrastructure to produce a scored readiness report.
|
|
8
|
+
|
|
9
|
+
```
|
|
10
|
+
┌─────────────────────┐
|
|
11
|
+
│ Agent Engine (GCP) │
|
|
12
|
+
│ ├─ Runtime Config │
|
|
13
|
+
│ ├─ Code Exec Sandbox│
|
|
14
|
+
│ ├─ Memory Bank │
|
|
15
|
+
│ └─ A2A Endpoints │
|
|
16
|
+
└──────────┬──────────┘
|
|
17
|
+
│
|
|
18
|
+
Developer Request ──→ [Vertex Engine Inspector] ──→ YAML Inspection Report
|
|
19
|
+
│
|
|
20
|
+
┌──────────┴──────────┐
|
|
21
|
+
│ GCP Services │
|
|
22
|
+
│ ├─ IAM Policies │
|
|
23
|
+
│ ├─ VPC-SC Perimeter │
|
|
24
|
+
│ ├─ Cloud Monitoring │
|
|
25
|
+
│ └─ Cloud Logging │
|
|
26
|
+
└─────────────────────┘
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
## Data Flow
|
|
30
|
+
|
|
31
|
+
1. **Input**: Project ID, Agent Engine ID, and optional location (defaults to `us-central1`). The skill receives these as arguments or parses them from the user request.
|
|
32
|
+
2. **Processing**: Connect to Agent Engine via Python SDK to retrieve metadata. Sequentially validate each of 7 categories: parse runtime config, check sandbox TTL/type, verify Memory Bank settings, probe A2A endpoints with curl, audit IAM/VPC-SC/encryption via gcloud, query Cloud Monitoring for 24h metrics, and assess observability configuration. Score each category with weighted criteria.
|
|
33
|
+
3. **Output**: A YAML inspection report with per-category scores (0-100%), an overall weighted readiness percentage, a compliance matrix for A2A endpoints, performance metrics summary, and a prioritized recommendation list with estimated score improvement per item.
|
|
34
|
+
|
|
35
|
+
## Key Design Decisions
|
|
36
|
+
|
|
37
|
+
| Decision | Choice | Rationale |
|
|
38
|
+
|----------|--------|-----------|
|
|
39
|
+
| Python SDK for Agent Engine | `vertexai.Client()` not gcloud CLI | No gcloud CLI surface exists for Agent Engine — SDK is the only programmatic interface |
|
|
40
|
+
| Read-only inspection | Never modify the deployment | Safety: inspectors should observe, not change production systems |
|
|
41
|
+
| YAML output format | YAML over JSON or Markdown | Machine-parseable for CI pipelines while remaining human-readable for operators |
|
|
42
|
+
| Weighted scoring | Category weights reflecting production impact | Security and reliability weighted higher than monitoring; matches real incident severity |
|
|
43
|
+
| 24-hour metric window | Query last 24h by default | Balances recency with statistical significance for error rates and latency |
|
|
44
|
+
| Sequential category checks | Run categories one at a time, not parallel | Allows early categories to inform later ones (e.g., runtime config affects security audit) |
|
|
45
|
+
| Prioritized recommendations | Score improvement estimate per item | Helps teams focus remediation effort where it matters most |
|
|
46
|
+
|
|
47
|
+
## Tool Usage Pattern
|
|
48
|
+
|
|
49
|
+
| Tool | Purpose |
|
|
50
|
+
|------|---------|
|
|
51
|
+
| Read | Parse existing inspection reports, agent configuration files, and IAM policy exports |
|
|
52
|
+
| Grep | Search for hardcoded credentials, security anti-patterns, and configuration values in agent source |
|
|
53
|
+
| Glob | Discover agent project files, deployment configs, and monitoring setup files |
|
|
54
|
+
| Bash(cmd:*) | Execute Python SDK commands, gcloud IAM/monitoring queries, curl for A2A endpoint probing |
|
|
55
|
+
|
|
56
|
+
## Error Handling Strategy
|
|
57
|
+
|
|
58
|
+
| Error Class | Detection | Recovery |
|
|
59
|
+
|------------|-----------|----------|
|
|
60
|
+
| Authentication failure | `PermissionDenied` or `Unauthenticated` from SDK/gcloud | Verify `gcloud auth list`, check `roles/aiplatform.user` binding, re-authenticate |
|
|
61
|
+
| Agent not found | `NotFound` from `agent_engines.get()` | List available agents with `agent_engines.list()` and suggest the closest match |
|
|
62
|
+
| A2A endpoint unreachable | curl returns non-200 or connection timeout | Mark A2A checks as FAIL, note the endpoint is not configured, continue scoring other categories |
|
|
63
|
+
| Monitoring data empty | Cloud Monitoring query returns no time series | Check if Monitoring API is enabled and agent has received traffic; skip performance scoring with explanation |
|
|
64
|
+
| VPC-SC access blocked | `VPC_SERVICE_CONTROLS` error in API response | Advise adding inspector SA to access level; provide the gcloud command to create the access policy |
|
|
65
|
+
|
|
66
|
+
## Extension Points
|
|
67
|
+
|
|
68
|
+
- Custom scoring weights: override default category weights by passing a weights config for org-specific priorities
|
|
69
|
+
- Additional inspection categories: add new check functions following the `{category}_check() -> {score, findings}` pattern
|
|
70
|
+
- CI/CD integration: pipe the YAML output into a quality gate that blocks deployment below a threshold score
|
|
71
|
+
- Historical tracking: store inspection reports in GCS or BigQuery to track readiness trends over time
|
|
72
|
+
- Alert integration: feed critical findings directly into PagerDuty or Slack via webhook
|
|
73
|
+
- Multi-agent fleet inspection: iterate over all agents in a project to produce a fleet-wide readiness dashboard
|
|
74
|
+
- Compliance profiles: define inspection profiles for SOC2, HIPAA, or FedRAMP with stricter thresholds per category
|