tribunal-kit 1.0.0 → 2.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (125) hide show
  1. package/.agent/.shared/ui-ux-pro-max/README.md +3 -3
  2. package/.agent/ARCHITECTURE.md +205 -10
  3. package/.agent/GEMINI.md +37 -7
  4. package/.agent/agents/accessibility-reviewer.md +134 -0
  5. package/.agent/agents/ai-code-reviewer.md +129 -0
  6. package/.agent/agents/frontend-specialist.md +3 -0
  7. package/.agent/agents/game-developer.md +21 -21
  8. package/.agent/agents/logic-reviewer.md +12 -0
  9. package/.agent/agents/mobile-reviewer.md +79 -0
  10. package/.agent/agents/orchestrator.md +56 -26
  11. package/.agent/agents/performance-reviewer.md +36 -0
  12. package/.agent/agents/supervisor-agent.md +156 -0
  13. package/.agent/agents/swarm-worker-contracts.md +166 -0
  14. package/.agent/agents/swarm-worker-registry.md +92 -0
  15. package/.agent/rules/GEMINI.md +134 -5
  16. package/.agent/scripts/bundle_analyzer.py +259 -0
  17. package/.agent/scripts/dependency_analyzer.py +247 -0
  18. package/.agent/scripts/lint_runner.py +188 -0
  19. package/.agent/scripts/patch_skills_meta.py +177 -0
  20. package/.agent/scripts/patch_skills_output.py +285 -0
  21. package/.agent/scripts/schema_validator.py +279 -0
  22. package/.agent/scripts/security_scan.py +224 -0
  23. package/.agent/scripts/session_manager.py +144 -3
  24. package/.agent/scripts/skill_integrator.py +234 -0
  25. package/.agent/scripts/strengthen_skills.py +220 -0
  26. package/.agent/scripts/swarm_dispatcher.py +317 -0
  27. package/.agent/scripts/test_runner.py +192 -0
  28. package/.agent/scripts/test_swarm_dispatcher.py +163 -0
  29. package/.agent/skills/agent-organizer/SKILL.md +132 -0
  30. package/.agent/skills/agentic-patterns/SKILL.md +335 -0
  31. package/.agent/skills/api-patterns/SKILL.md +226 -50
  32. package/.agent/skills/app-builder/SKILL.md +215 -52
  33. package/.agent/skills/architecture/SKILL.md +176 -31
  34. package/.agent/skills/bash-linux/SKILL.md +150 -134
  35. package/.agent/skills/behavioral-modes/SKILL.md +152 -160
  36. package/.agent/skills/brainstorming/SKILL.md +148 -101
  37. package/.agent/skills/brainstorming/dynamic-questioning.md +10 -0
  38. package/.agent/skills/clean-code/SKILL.md +139 -134
  39. package/.agent/skills/code-review-checklist/SKILL.md +177 -80
  40. package/.agent/skills/config-validator/SKILL.md +165 -0
  41. package/.agent/skills/csharp-developer/SKILL.md +107 -0
  42. package/.agent/skills/database-design/SKILL.md +252 -29
  43. package/.agent/skills/deployment-procedures/SKILL.md +122 -175
  44. package/.agent/skills/devops-engineer/SKILL.md +134 -0
  45. package/.agent/skills/devops-incident-responder/SKILL.md +98 -0
  46. package/.agent/skills/documentation-templates/SKILL.md +175 -121
  47. package/.agent/skills/dotnet-core-expert/SKILL.md +103 -0
  48. package/.agent/skills/edge-computing/SKILL.md +213 -0
  49. package/.agent/skills/frontend-design/SKILL.md +76 -0
  50. package/.agent/skills/frontend-design/color-system.md +18 -0
  51. package/.agent/skills/frontend-design/typography-system.md +18 -0
  52. package/.agent/skills/game-development/SKILL.md +69 -0
  53. package/.agent/skills/geo-fundamentals/SKILL.md +158 -99
  54. package/.agent/skills/i18n-localization/SKILL.md +158 -96
  55. package/.agent/skills/intelligent-routing/SKILL.md +89 -285
  56. package/.agent/skills/intelligent-routing/router-manifest.md +65 -0
  57. package/.agent/skills/lint-and-validate/SKILL.md +229 -27
  58. package/.agent/skills/llm-engineering/SKILL.md +258 -0
  59. package/.agent/skills/local-first/SKILL.md +203 -0
  60. package/.agent/skills/mcp-builder/SKILL.md +159 -111
  61. package/.agent/skills/mobile-design/SKILL.md +102 -282
  62. package/.agent/skills/nextjs-react-expert/SKILL.md +143 -227
  63. package/.agent/skills/nodejs-best-practices/SKILL.md +201 -254
  64. package/.agent/skills/observability/SKILL.md +285 -0
  65. package/.agent/skills/parallel-agents/SKILL.md +124 -118
  66. package/.agent/skills/performance-profiling/SKILL.md +143 -89
  67. package/.agent/skills/plan-writing/SKILL.md +133 -97
  68. package/.agent/skills/platform-engineer/SKILL.md +135 -0
  69. package/.agent/skills/powershell-windows/SKILL.md +167 -104
  70. package/.agent/skills/python-patterns/SKILL.md +149 -361
  71. package/.agent/skills/python-pro/SKILL.md +114 -0
  72. package/.agent/skills/react-specialist/SKILL.md +107 -0
  73. package/.agent/skills/realtime-patterns/SKILL.md +296 -0
  74. package/.agent/skills/red-team-tactics/SKILL.md +136 -134
  75. package/.agent/skills/rust-pro/SKILL.md +237 -173
  76. package/.agent/skills/seo-fundamentals/SKILL.md +134 -82
  77. package/.agent/skills/server-management/SKILL.md +155 -104
  78. package/.agent/skills/sql-pro/SKILL.md +104 -0
  79. package/.agent/skills/systematic-debugging/SKILL.md +156 -79
  80. package/.agent/skills/tailwind-patterns/SKILL.md +163 -205
  81. package/.agent/skills/tdd-workflow/SKILL.md +148 -88
  82. package/.agent/skills/test-result-analyzer/SKILL.md +299 -0
  83. package/.agent/skills/testing-patterns/SKILL.md +141 -114
  84. package/.agent/skills/trend-researcher/SKILL.md +228 -0
  85. package/.agent/skills/ui-ux-pro-max/SKILL.md +107 -0
  86. package/.agent/skills/ui-ux-researcher/SKILL.md +234 -0
  87. package/.agent/skills/vue-expert/SKILL.md +118 -0
  88. package/.agent/skills/vulnerability-scanner/SKILL.md +228 -188
  89. package/.agent/skills/web-design-guidelines/SKILL.md +148 -33
  90. package/.agent/skills/webapp-testing/SKILL.md +171 -122
  91. package/.agent/skills/whimsy-injector/SKILL.md +349 -0
  92. package/.agent/skills/workflow-optimizer/SKILL.md +219 -0
  93. package/.agent/workflows/api-tester.md +279 -0
  94. package/.agent/workflows/audit.md +168 -0
  95. package/.agent/workflows/brainstorm.md +65 -19
  96. package/.agent/workflows/changelog.md +144 -0
  97. package/.agent/workflows/create.md +67 -14
  98. package/.agent/workflows/debug.md +122 -30
  99. package/.agent/workflows/deploy.md +82 -31
  100. package/.agent/workflows/enhance.md +59 -27
  101. package/.agent/workflows/fix.md +143 -0
  102. package/.agent/workflows/generate.md +84 -20
  103. package/.agent/workflows/migrate.md +163 -0
  104. package/.agent/workflows/orchestrate.md +66 -17
  105. package/.agent/workflows/performance-benchmarker.md +305 -0
  106. package/.agent/workflows/plan.md +76 -33
  107. package/.agent/workflows/preview.md +73 -17
  108. package/.agent/workflows/refactor.md +153 -0
  109. package/.agent/workflows/review-ai.md +140 -0
  110. package/.agent/workflows/review.md +83 -16
  111. package/.agent/workflows/session.md +154 -0
  112. package/.agent/workflows/status.md +74 -18
  113. package/.agent/workflows/strengthen-skills.md +99 -0
  114. package/.agent/workflows/swarm.md +194 -0
  115. package/.agent/workflows/test.md +80 -31
  116. package/.agent/workflows/tribunal-backend.md +55 -13
  117. package/.agent/workflows/tribunal-database.md +62 -18
  118. package/.agent/workflows/tribunal-frontend.md +58 -12
  119. package/.agent/workflows/tribunal-full.md +70 -11
  120. package/.agent/workflows/tribunal-mobile.md +123 -0
  121. package/.agent/workflows/tribunal-performance.md +152 -0
  122. package/.agent/workflows/ui-ux-pro-max.md +100 -82
  123. package/README.md +117 -62
  124. package/bin/tribunal-kit.js +329 -75
  125. package/package.json +10 -6
@@ -0,0 +1,285 @@
1
+ ---
2
+ name: observability
3
+ description: Production observability principles. OpenTelemetry traces, structured logs, metrics, SLOs/SLIs/error budgets, and AI observability. Use when setting up monitoring, debugging production issues, or designing observable distributed systems.
4
+ allowed-tools: Read, Write, Edit, Glob, Grep
5
+ version: 1.0.0
6
+ last-updated: 2026-03-12
7
+ applies-to-model: gemini-2.5-pro, claude-3-7-sonnet
8
+ ---
9
+
10
+ # Observability Principles
11
+
12
+ > Monitoring tells you when something is broken.
13
+ > Observability tells you why.
14
+
15
+ ---
16
+
17
+ ## The Three Pillars
18
+
19
+ ```
20
+ TRACES → The journey of a single request across services
21
+ "Why was THIS request slow?"
22
+
23
+ LOGS → Discrete events with context
24
+ "What exactly happened at 14:23:07?"
25
+
26
+ METRICS → Aggregated measurements over time
27
+ "What is our error rate over the last hour?"
28
+ ```
29
+
30
+ Use all three. They answer different questions. None replaces the others.
31
+
32
+ ---
33
+
34
+ ## OpenTelemetry: The Standard
35
+
36
+ OpenTelemetry (OTel) is the vendor-neutral standard for instrumentation. Use it and you can swap backends (Jaeger, Grafana Tempo, Honeycomb, Datadog) without changing application code.
37
+
38
+ ```ts
39
+ // src/instrumentation.ts — initialize OTel once, before app code
40
+ import { NodeSDK } from '@opentelemetry/sdk-node';
41
+ import { OTLPTraceExporter } from '@opentelemetry/exporter-trace-otlp-http';
42
+ import { Resource } from '@opentelemetry/resources';
43
+ import { SemanticResourceAttributes } from '@opentelemetry/semantic-conventions';
44
+
45
+ const sdk = new NodeSDK({
46
+ resource: new Resource({
47
+ [SemanticResourceAttributes.SERVICE_NAME]: 'my-api',
48
+ [SemanticResourceAttributes.SERVICE_VERSION]: '1.0.0',
49
+ }),
50
+ traceExporter: new OTLPTraceExporter({
51
+ url: process.env.OTEL_EXPORTER_OTLP_ENDPOINT,
52
+ }),
53
+ });
54
+
55
+ sdk.start();
56
+ process.on('SIGTERM', () => sdk.shutdown());
57
+ ```
58
+
59
+ ---
60
+
61
+ ## Distributed Tracing
62
+
63
+ Traces connect the dots across microservice boundaries:
64
+
65
+ ```ts
66
+ import { trace, context, SpanStatusCode } from '@opentelemetry/api';
67
+
68
+ const tracer = trace.getTracer('payment-service');
69
+
70
+ async function processPayment(orderId: string, amount: number) {
71
+ return tracer.startActiveSpan('payment.process', async (span) => {
72
+ try {
73
+ // Add business context to the span
74
+ span.setAttributes({
75
+ 'order.id': orderId,
76
+ 'payment.amount': amount,
77
+ 'payment.currency': 'USD',
78
+ });
79
+
80
+ const result = await chargeCard(orderId, amount);
81
+
82
+ span.setStatus({ code: SpanStatusCode.OK });
83
+ return result;
84
+ } catch (err) {
85
+ // Record the error with full context
86
+ span.recordException(err as Error);
87
+ span.setStatus({ code: SpanStatusCode.ERROR, message: (err as Error).message });
88
+ throw err;
89
+ } finally {
90
+ span.end();
91
+ }
92
+ });
93
+ }
94
+ ```
95
+
96
+ ---
97
+
98
+ ## Structured Logging
99
+
100
+ Logs must be machine-parseable:
101
+
102
+ ```ts
103
+ // ❌ Unstructured — impossible to query, filter, or alert on
104
+ console.log(`User ${userId} failed to login at ${new Date()}`);
105
+
106
+ // ✅ Structured — every field is queryable
107
+ logger.warn({
108
+ event: 'auth.login_failed',
109
+ userId,
110
+ reason: 'invalid_password',
111
+ attemptCount: 3,
112
+ ip: req.ip,
113
+ timestamp: new Date().toISOString(),
114
+ });
115
+ ```
116
+
117
+ ### What to Always Log
118
+
119
+ | Always | Never |
120
+ |---|---|
121
+ | Request ID / trace ID | Passwords or password hashes |
122
+ | User ID (not PII) | Credit card numbers |
123
+ | Error type + message | API keys or tokens |
124
+ | Duration (ms) | Full request bodies (may contain PII) |
125
+ | HTTP status code | |
126
+
127
+ ---
128
+
129
+ ## Metrics: What to Measure
130
+
131
+ The four golden signals (Google SRE):
132
+
133
+ ```
134
+ 1. LATENCY — How long does serving a request take?
135
+ Track p50, p95, p99 — not just average
136
+ Average hides the worst-case user experience
137
+
138
+ 2. TRAFFIC — How much demand is there?
139
+ requests/sec, messages/sec, bytes/sec
140
+
141
+ 3. ERRORS — What fraction of requests are failing?
142
+ HTTP 5xx rate, exception rate, timeout rate
143
+
144
+ 4. SATURATION — How "full" is your service?
145
+ CPU %, memory %, queue depth
146
+ ```
147
+
148
+ ---
149
+
150
+ ## SLOs / SLIs / Error Budgets
151
+
152
+ The framework that connects technical work to business reliability:
153
+
154
+ ```
155
+ SLI (Service Level Indicator) — a specific, measurable signal:
156
+ "HTTP 200 responses as % of all responses to /api/checkout"
157
+
158
+ SLO (Service Level Objective) — your reliability promise:
159
+ "99.9% of checkout requests succeed over a 30-day window"
160
+
161
+ Error Budget — how much unreliability you can afford:
162
+ "30 days × 0.1% error tolerance = 43.2 minutes of downtime allowed"
163
+
164
+ Error Budget Policy:
165
+ Budget healthy → ship new features freely
166
+ Budget depleted → freeze releases, focus only on reliability
167
+ ```
168
+
169
+ ---
170
+
171
+ ## AI Observability
172
+
173
+ Standard metrics don't cover AI systems. Add these:
174
+
175
+ ```ts
176
+ // Track every AI call with these dimensions
177
+ logger.info({
178
+ event: 'ai.completion',
179
+ model: 'gpt-4o',
180
+ prompt_tokens: response.usage.prompt_tokens,
181
+ completion_tokens: response.usage.completion_tokens,
182
+ total_tokens: response.usage.total_tokens,
183
+ latency_ms: duration,
184
+ cost_usd: calculateCost(model, usage),
185
+ trace_id: currentTraceId(),
186
+
187
+ // Eval scores (from async evaluation pipeline)
188
+ eval_faithfulness: 0.92, // Did output match sources?
189
+ eval_relevance: 0.88, // Did output answer the question?
190
+ });
191
+ ```
192
+
193
+ ### AI-Specific Alerts
194
+
195
+ ```
196
+ 🚨 TOKEN COST SPIKE → cost per request > 2x trailing average → alert
197
+ 🚨 LATENCY DEGRADATION → p95 LLM latency > 5s → alert
198
+ 🚨 EVAL SCORE DECLINE → faithfulness drops below 0.8 (model drift?) → alert
199
+ 🚨 ERROR RATE SPIKE → 429s or context_length errors > 5% → alert
200
+ ```
201
+
202
+ ---
203
+
204
+ ## Output Format
205
+
206
+ When this skill produces a recommendation or design decision, structure your output as:
207
+
208
+ ```
209
+ ━━━ Observability Recommendation ━━━━━━━━━━━━━━━━
210
+ Decision: [what was chosen / proposed]
211
+ Rationale: [why — one concise line]
212
+ Trade-offs: [what is consciously accepted]
213
+ Next action: [concrete next step for the user]
214
+ ─────────────────────────────────────────────────
215
+ Pre-Flight: ✅ All checks passed
216
+ or ❌ [blocking item that must be resolved first]
217
+ ```
218
+
219
+
220
+ ---
221
+
222
+ ## 🏛️ Tribunal Integration (Anti-Hallucination)
223
+
224
+ **Slash command: `/tribunal-backend`**
225
+ **Active reviewers: `logic` · `security` · `performance`**
226
+
227
+ ### ❌ Forbidden AI Tropes in Observability
228
+
229
+ 1. **Logging sensitive data** — never log request bodies wholesale — they contain passwords, tokens, PII. Log only specific, safe fields.
230
+ 2. **Tracking averages only** — `avg(latency)` hides the 1% of users who get 10x worse experience. Always use percentiles (p95, p99).
231
+ 3. **100% SLO targets** — `99.999%` SLOs are wrong for most services. They consume all error budget instantly and paralyze product velocity.
232
+ 4. **Inventing OTel packages** — only use `@opentelemetry/{sdk-node,api,exporter-*}` from the official `@opentelemetry` npm org.
233
+
234
+ ### ✅ Pre-Flight Self-Audit
235
+
236
+ ```
237
+ ✅ Are logs structured JSON (not string-interpolated messages)?
238
+ ✅ Is no PII or credential data being logged?
239
+ ✅ Are latency measurements tracking percentiles (p95/p99), not just averages?
240
+ ✅ Does every async operation have a trace span with error recording?
241
+ ✅ Are AI calls instrumented with token count + cost + latency tracking?
242
+ ✅ Is there an SLO defined with an explicit error budget policy?
243
+ ```
244
+
245
+
246
+ ---
247
+
248
+ ## 🤖 LLM-Specific Traps
249
+
250
+ AI coding assistants often fall into specific bad habits when dealing with this domain. These are strictly forbidden:
251
+
252
+ 1. **Over-engineering:** Proposing complex abstractions or distributed systems when a simpler approach suffices.
253
+ 2. **Hallucinated Libraries/Methods:** Using non-existent methods or packages. Always `// VERIFY` or check `package.json` / `requirements.txt`.
254
+ 3. **Skipping Edge Cases:** Writing the "happy path" and ignoring error handling, timeouts, or data validation.
255
+ 4. **Context Amnesia:** Forgetting the user's constraints and offering generic advice instead of tailored solutions.
256
+ 5. **Silent Degradation:** Catching and suppressing errors without logging or re-raising.
257
+
258
+ ---
259
+
260
+ ## 🏛️ Tribunal Integration (Anti-Hallucination)
261
+
262
+ **Slash command: `/review` or `/tribunal-full`**
263
+ **Active reviewers: `logic-reviewer` · `security-auditor`**
264
+
265
+ ### ❌ Forbidden AI Tropes
266
+
267
+ 1. **Blind Assumptions:** Never make an assumption without documenting it clearly with `// VERIFY: [reason]`.
268
+ 2. **Silent Degradation:** Catching and suppressing errors without logging or handling.
269
+ 3. **Context Amnesia:** Forgetting the user's constraints and offering generic advice instead of tailored solutions.
270
+
271
+ ### ✅ Pre-Flight Self-Audit
272
+
273
+ Review these questions before confirming output:
274
+ ```
275
+ ✅ Did I rely ONLY on real, verified tools and methods?
276
+ ✅ Is this solution appropriately scoped to the user's constraints?
277
+ ✅ Did I handle potential failure modes and edge cases?
278
+ ✅ Have I avoided generic boilerplate that doesn't add value?
279
+ ```
280
+
281
+ ### 🛑 Verification-Before-Completion (VBC) Protocol
282
+
283
+ **CRITICAL:** You must follow a strict "evidence-based closeout" state machine.
284
+ - ❌ **Forbidden:** Declaring a task complete because the output "looks correct."
285
+ - ✅ **Required:** You are explicitly forbidden from finalizing any task without providing **concrete evidence** (terminal output, passing tests, compile success, or equivalent proof) that your output works as intended.
@@ -1,175 +1,181 @@
1
1
  ---
2
2
  name: parallel-agents
3
3
  description: Multi-agent orchestration patterns. Use when multiple independent tasks can run with different domain expertise or when comprehensive analysis requires multiple perspectives.
4
- allowed-tools: Read, Glob, Grep
4
+ allowed-tools: Read, Write, Edit, Glob, Grep
5
+ version: 1.0.0
6
+ last-updated: 2026-03-12
7
+ applies-to-model: gemini-2.5-pro, claude-3-7-sonnet
5
8
  ---
6
9
 
7
- # Native Parallel Agents
10
+ # Multi-Agent Orchestration
8
11
 
9
- > Orchestration through Antigravity's built-in Agent Tool
12
+ > Parallel agents are faster. They are also harder to keep consistent.
13
+ > Coordinate them — don't just fire them simultaneously and hope for compatible outputs.
10
14
 
11
- ## Overview
12
-
13
- This skill enables coordinating multiple specialized agents through Antigravity's native agent system. Unlike external scripts, this approach keeps all orchestration within Antigravity's control.
15
+ ---
14
16
 
15
- ## When to Use Orchestration
17
+ ## When to Use Parallel Agents
16
18
 
17
- **Good for:**
18
- - Complex tasks requiring multiple expertise domains
19
- - Code analysis from security, performance, and quality perspectives
20
- - Comprehensive reviews (architecture + security + testing)
21
- - Feature implementation needing backend + frontend + database work
19
+ Use multiple agents when:
20
+ - Tasks are genuinely **independent** (output of A doesn't feed input of B)
21
+ - Different tasks require **different domain expertise**
22
+ - Comprehensive **review** needs multiple specialist perspectives simultaneously
23
+ - Speed matters and tasks can be assigned and awaited independently
22
24
 
23
- **Not for:**
24
- - Simple, single-domain tasks
25
- - Quick fixes or small changes
26
- - Tasks where one agent suffices
25
+ Do **not** use parallel agents when:
26
+ - Tasks have sequential dependencies (you need the result to start the next)
27
+ - The overhead of coordination exceeds the time saved
27
28
 
28
29
  ---
29
30
 
30
- ## Native Agent Invocation
31
+ ## Orchestration Patterns
31
32
 
32
- ### Single Agent
33
- ```
34
- Use the security-auditor agent to review authentication
35
- ```
33
+ ### Pattern 1 — Parallel Review (Tribunal)
36
34
 
37
- ### Sequential Chain
38
- ```
39
- First, use the explorer-agent to discover project structure.
40
- Then, use the backend-specialist to review API endpoints.
41
- Finally, use the test-engineer to identify test gaps.
42
- ```
35
+ Multiple reviewers look at the same code simultaneously, each from a different angle.
43
36
 
44
- ### With Context Passing
45
- ```
46
- Use the frontend-specialist to analyze React components.
47
- Based on those findings, have the test-engineer generate component tests.
48
37
  ```
38
+ Code (input)
39
+ ├── → logic-reviewer → finds logic errors
40
+ ├── → security-auditor → finds vulnerabilities
41
+ ├── → type-safety-reviewer → finds type unsafe code
42
+ └── → performance-reviewer → finds bottlenecks
49
43
 
50
- ### Resume Previous Work
51
- ```
52
- Resume agent [agentId] and continue with additional requirements.
44
+ All verdicts synthesize → Human Gate (approve/reject/revise)
53
45
  ```
54
46
 
55
- ---
47
+ **When:** `/tribunal-*` commands, code review before merge
56
48
 
57
- ## Orchestration Patterns
49
+ ### Pattern 2 — Domain Specialization
50
+
51
+ Different specialists handle different parts of the same task simultaneously.
58
52
 
59
- ### Pattern 1: Comprehensive Analysis
60
53
  ```
61
- Agents: explorer-agent [domain-agents] synthesis
62
-
63
- 1. explorer-agent: Map codebase structure
64
- 2. security-auditor: Security posture
65
- 3. backend-specialist: API quality
66
- 4. frontend-specialist: UI/UX patterns
67
- 5. test-engineer: Test coverage
68
- 6. Synthesize all findings
54
+ "Build a user auth system" (input)
55
+ ├── → backend-specialist → API routes + JWT logic
56
+ ├── → frontend-specialist → Login/register UI
57
+ └── → database-architect → User schema + sessions table
58
+
59
+ All outputs orchestrator synthesizes into coherent system
60
+ (ensures API contract matches what frontend calls,
61
+ and DB schema matches what backend queries)
69
62
  ```
70
63
 
71
- ### Pattern 2: Feature Review
72
- ```
73
- Agents: affected-domain-agents → test-engineer
64
+ **When:** Full-stack feature builds via `/orchestrate`
74
65
 
75
- 1. Identify affected domains (backend? frontend? both?)
76
- 2. Invoke relevant domain agents
77
- 3. test-engineer verifies changes
78
- 4. Synthesize recommendations
79
- ```
66
+ ### Pattern 3 Sequential with Parallel Phases
67
+
68
+ Some tasks are inherently sequential at the macro level but can parallelize within each phase.
80
69
 
81
- ### Pattern 3: Security Audit
82
70
  ```
83
- Agents: security-auditor → penetration-tester → synthesis
71
+ Phase 1 (sequential):
72
+ database-architect → schema design
84
73
 
85
- 1. security-auditor: Configuration and code review
86
- 2. penetration-tester: Active vulnerability testing
87
- 3. Synthesize with prioritized remediation
74
+ Phase 2 (parallel, after Phase 1):
75
+ backend-specialist → API uses schema from Phase 1
76
+ frontend-specialist UI uses API contract from Phase 2a (estimated)
77
+
78
+ Phase 3 (sequential, after Phase 2):
79
+ test-engineer → E2E tests with real API + UI
88
80
  ```
89
81
 
90
82
  ---
91
83
 
92
- ## Available Agents
93
-
94
- | Agent | Expertise | Trigger Phrases |
95
- |-------|-----------|-----------------|
96
- | `orchestrator` | Coordination | "comprehensive", "multi-perspective" |
97
- | `security-auditor` | Security | "security", "auth", "vulnerabilities" |
98
- | `penetration-tester` | Security Testing | "pentest", "red team", "exploit" |
99
- | `backend-specialist` | Backend | "API", "server", "Node.js", "Express" |
100
- | `frontend-specialist` | Frontend | "React", "UI", "components", "Next.js" |
101
- | `test-engineer` | Testing | "tests", "coverage", "TDD" |
102
- | `devops-engineer` | DevOps | "deploy", "CI/CD", "infrastructure" |
103
- | `database-architect` | Database | "schema", "Prisma", "migrations" |
104
- | `mobile-developer` | Mobile | "React Native", "Flutter", "mobile" |
105
- | `api-designer` | API Design | "REST", "GraphQL", "OpenAPI" |
106
- | `debugger` | Debugging | "bug", "error", "not working" |
107
- | `explorer-agent` | Discovery | "explore", "map", "structure" |
108
- | `documentation-writer` | Documentation | "write docs", "create README", "generate API docs" |
109
- | `performance-optimizer` | Performance | "slow", "optimize", "profiling" |
110
- | `project-planner` | Planning | "plan", "roadmap", "milestones" |
111
- | `seo-specialist` | SEO | "SEO", "meta tags", "search ranking" |
112
- | `game-developer` | Game Development | "game", "Unity", "Godot", "Phaser" |
84
+ ## Orchestrator Responsibilities
85
+
86
+ The orchestrator coordinates agents. It:
87
+
88
+ 1. **Assigns scope** each agent gets exactly what it needs, nothing more
89
+ 2. **Manages state** passes the right outputs from each agent to the next that needs them
90
+ 3. **Resolves conflicts** when two agents propose incompatible solutions, the orchestrator decides or asks the user
91
+ 4. **Verifies consistency** ensures that the API contract the backend builds matches what the frontend calls
113
92
 
114
93
  ---
115
94
 
116
- ## Antigravity Built-in Agents
95
+ ## Consistency Rules for Multi-Agent Output
117
96
 
118
- These work alongside custom agents:
97
+ The biggest failure in parallel agent work is **inconsistency at boundaries**:
119
98
 
120
- | Agent | Model | Purpose |
121
- |-------|-------|---------|
122
- | **Explore** | Haiku | Fast read-only codebase search |
123
- | **Plan** | Sonnet | Research during plan mode |
124
- | **General-purpose** | Sonnet | Complex multi-step modifications |
99
+ - Backend generates `userId` but frontend calls it `user_id`
100
+ - Database schema has `user_email` but backend queries `email`
101
+ - Agent A designs one error shape; Agent B assumes a different one
125
102
 
126
- Use **Explore** for quick searches, **custom agents** for domain expertise.
103
+ **Prevention:**
104
+ - Establish contracts (types, schemas, API shapes) **before** parallel work begins
105
+ - Each agent receives the shared contract as context
106
+ - Orchestrator reviews all outputs for boundary consistency before presenting to user
127
107
 
128
108
  ---
129
109
 
130
- ## Synthesis Protocol
110
+ ## Communication Format Between Agents
131
111
 
132
- After all agents complete, synthesize:
112
+ When one agent's output feeds another:
133
113
 
134
- ```markdown
135
- ## Orchestration Synthesis
114
+ ```
115
+ [AGENT: backend-specialist OUTPUT]
116
+ API Contract:
117
+ POST /api/users → { id: string, email: string, createdAt: string }
118
+ POST /api/auth/login → { token: string, expiresAt: string }
136
119
 
137
- ### Task Summary
138
- [What was accomplished]
120
+ [AGENT: frontend-specialist RECEIVES]
121
+ Use the above API contract. Build the UI to match these exact request/response shapes.
122
+ ```
123
+
124
+ ---
139
125
 
140
- ### Agent Contributions
141
- | Agent | Finding |
142
- |-------|---------|
143
- | security-auditor | Found X |
144
- | backend-specialist | Identified Y |
126
+ ## Output Format
145
127
 
146
- ### Consolidated Recommendations
147
- 1. **Critical**: [Issue from Agent A]
148
- 2. **Important**: [Issue from Agent B]
149
- 3. **Nice-to-have**: [Enhancement from Agent C]
128
+ When this skill completes a task, structure your output as:
150
129
 
151
- ### Action Items
152
- - [ ] Fix critical security issue
153
- - [ ] Refactor API endpoint
154
- - [ ] Add missing tests
155
130
  ```
131
+ ━━━ Parallel Agents Output ━━━━━━━━━━━━━━━━━━━━━━━━
132
+ Task: [what was performed]
133
+ Result: [outcome summary — one line]
134
+ ─────────────────────────────────────────────────
135
+ Checks: ✅ [N passed] · ⚠️ [N warnings] · ❌ [N blocked]
136
+ VBC status: PENDING → VERIFIED
137
+ Evidence: [link to terminal output, test result, or file diff]
138
+ ```
139
+
140
+
156
141
 
157
142
  ---
158
143
 
159
- ## Best Practices
144
+ ## 🤖 LLM-Specific Traps
145
+
146
+ AI coding assistants often fall into specific bad habits when dealing with this domain. These are strictly forbidden:
160
147
 
161
- 1. **Available agents** - 17 specialized agents can be orchestrated
162
- 2. **Logical order** - Discovery Analysis Implementation Testing
163
- 3. **Share context** - Pass relevant findings to subsequent agents
164
- 4. **Single synthesis** - One unified report, not separate outputs
165
- 5. **Verify changes** - Always include test-engineer for code modifications
148
+ 1. **Over-engineering:** Proposing complex abstractions or distributed systems when a simpler approach suffices.
149
+ 2. **Hallucinated Libraries/Methods:** Using non-existent methods or packages. Always `// VERIFY` or check `package.json` / `requirements.txt`.
150
+ 3. **Skipping Edge Cases:** Writing the "happy path" and ignoring error handling, timeouts, or data validation.
151
+ 4. **Context Amnesia:** Forgetting the user's constraints and offering generic advice instead of tailored solutions.
152
+ 5. **Silent Degradation:** Catching and suppressing errors without logging or re-raising.
166
153
 
167
154
  ---
168
155
 
169
- ## Key Benefits
156
+ ## 🏛️ Tribunal Integration (Anti-Hallucination)
157
+
158
+ **Slash command: `/review` or `/tribunal-full`**
159
+ **Active reviewers: `logic-reviewer` · `security-auditor`**
160
+
161
+ ### ❌ Forbidden AI Tropes
162
+
163
+ 1. **Blind Assumptions:** Never make an assumption without documenting it clearly with `// VERIFY: [reason]`.
164
+ 2. **Silent Degradation:** Catching and suppressing errors without logging or handling.
165
+ 3. **Context Amnesia:** Forgetting the user's constraints and offering generic advice instead of tailored solutions.
166
+
167
+ ### ✅ Pre-Flight Self-Audit
168
+
169
+ Review these questions before confirming output:
170
+ ```
171
+ ✅ Did I rely ONLY on real, verified tools and methods?
172
+ ✅ Is this solution appropriately scoped to the user's constraints?
173
+ ✅ Did I handle potential failure modes and edge cases?
174
+ ✅ Have I avoided generic boilerplate that doesn't add value?
175
+ ```
176
+
177
+ ### 🛑 Verification-Before-Completion (VBC) Protocol
170
178
 
171
- - **Single session** - All agents share context
172
- - **AI-controlled** - Claude orchestrates autonomously
173
- - ✅ **Native integration** - Works with built-in Explore, Plan agents
174
- - ✅ **Resume support** - Can continue previous agent work
175
- - ✅ **Context passing** - Findings flow between agents
179
+ **CRITICAL:** You must follow a strict "evidence-based closeout" state machine.
180
+ - **Forbidden:** Declaring a task complete because the output "looks correct."
181
+ - ✅ **Required:** You are explicitly forbidden from finalizing any task without providing **concrete evidence** (terminal output, passing tests, compile success, or equivalent proof) that your output works as intended.