@vfarcic/dot-ai 0.5.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (145) hide show
  1. package/.claude/commands/context-load.md +11 -0
  2. package/.claude/commands/context-save.md +16 -0
  3. package/.claude/commands/prd-done.md +115 -0
  4. package/.claude/commands/prd-get.md +25 -0
  5. package/.claude/commands/prd-start.md +87 -0
  6. package/.claude/commands/task-done.md +77 -0
  7. package/.claude/commands/tests-reminder.md +32 -0
  8. package/.claude/settings.local.json +20 -0
  9. package/.eslintrc.json +25 -0
  10. package/.github/workflows/ci.yml +170 -0
  11. package/.prettierrc.json +10 -0
  12. package/.teller.yml +8 -0
  13. package/CLAUDE.md +162 -0
  14. package/assets/images/logo.png +0 -0
  15. package/bin/dot-ai.ts +47 -0
  16. package/destroy.sh +45 -0
  17. package/devbox.json +13 -0
  18. package/devbox.lock +225 -0
  19. package/docs/API.md +449 -0
  20. package/docs/CONTEXT.md +49 -0
  21. package/docs/DEVELOPMENT.md +203 -0
  22. package/docs/NEXT_STEPS.md +97 -0
  23. package/docs/STAGE_BASED_API.md +97 -0
  24. package/docs/cli-guide.md +798 -0
  25. package/docs/design.md +750 -0
  26. package/docs/discovery-engine.md +515 -0
  27. package/docs/error-handling.md +429 -0
  28. package/docs/function-registration.md +157 -0
  29. package/docs/mcp-guide.md +416 -0
  30. package/package.json +2 -121
  31. package/renovate.json +51 -0
  32. package/setup.sh +111 -0
  33. package/{dist/cli.js → src/cli.ts} +26 -19
  34. package/src/core/claude.ts +280 -0
  35. package/src/core/deploy-operation.ts +127 -0
  36. package/src/core/discovery.ts +900 -0
  37. package/src/core/error-handling.ts +562 -0
  38. package/src/core/index.ts +143 -0
  39. package/src/core/kubernetes-utils.ts +218 -0
  40. package/src/core/memory.ts +148 -0
  41. package/src/core/schema.ts +830 -0
  42. package/src/core/session-utils.ts +97 -0
  43. package/src/core/workflow.ts +234 -0
  44. package/src/index.ts +18 -0
  45. package/src/interfaces/cli.ts +872 -0
  46. package/src/interfaces/mcp.ts +183 -0
  47. package/src/mcp/server.ts +131 -0
  48. package/src/tools/answer-question.ts +807 -0
  49. package/src/tools/choose-solution.ts +169 -0
  50. package/src/tools/deploy-manifests.ts +94 -0
  51. package/src/tools/generate-manifests.ts +502 -0
  52. package/src/tools/index.ts +41 -0
  53. package/src/tools/recommend.ts +370 -0
  54. package/tests/__mocks__/@kubernetes/client-node.ts +106 -0
  55. package/tests/build-system.test.ts +345 -0
  56. package/tests/configuration.test.ts +226 -0
  57. package/tests/core/deploy-operation.test.ts +38 -0
  58. package/tests/core/discovery.test.ts +1648 -0
  59. package/tests/core/error-handling.test.ts +632 -0
  60. package/tests/core/schema.test.ts +1658 -0
  61. package/tests/core/session-utils.test.ts +245 -0
  62. package/tests/core.test.ts +439 -0
  63. package/tests/fixtures/configmap-no-labels.yaml +8 -0
  64. package/tests/fixtures/crossplane-app-configuration.yaml +6 -0
  65. package/tests/fixtures/crossplane-providers.yaml +45 -0
  66. package/tests/fixtures/crossplane-rbac.yaml +48 -0
  67. package/tests/fixtures/invalid-configmap.yaml +8 -0
  68. package/tests/fixtures/invalid-deployment.yaml +17 -0
  69. package/tests/fixtures/test-deployment.yaml +28 -0
  70. package/tests/fixtures/valid-configmap.yaml +15 -0
  71. package/tests/infrastructure.test.ts +426 -0
  72. package/tests/interfaces/cli.test.ts +1036 -0
  73. package/tests/interfaces/mcp.test.ts +139 -0
  74. package/tests/kubernetes-utils.test.ts +200 -0
  75. package/tests/mcp/server.test.ts +126 -0
  76. package/tests/setup.ts +31 -0
  77. package/tests/tools/answer-question.test.ts +367 -0
  78. package/tests/tools/choose-solution.test.ts +481 -0
  79. package/tests/tools/deploy-manifests.test.ts +185 -0
  80. package/tests/tools/generate-manifests.test.ts +441 -0
  81. package/tests/tools/index.test.ts +111 -0
  82. package/tests/tools/recommend.test.ts +180 -0
  83. package/tsconfig.json +34 -0
  84. package/dist/cli.d.ts +0 -3
  85. package/dist/cli.d.ts.map +0 -1
  86. package/dist/core/claude.d.ts +0 -42
  87. package/dist/core/claude.d.ts.map +0 -1
  88. package/dist/core/claude.js +0 -229
  89. package/dist/core/deploy-operation.d.ts +0 -38
  90. package/dist/core/deploy-operation.d.ts.map +0 -1
  91. package/dist/core/deploy-operation.js +0 -101
  92. package/dist/core/discovery.d.ts +0 -162
  93. package/dist/core/discovery.d.ts.map +0 -1
  94. package/dist/core/discovery.js +0 -758
  95. package/dist/core/error-handling.d.ts +0 -167
  96. package/dist/core/error-handling.d.ts.map +0 -1
  97. package/dist/core/error-handling.js +0 -399
  98. package/dist/core/index.d.ts +0 -42
  99. package/dist/core/index.d.ts.map +0 -1
  100. package/dist/core/index.js +0 -123
  101. package/dist/core/kubernetes-utils.d.ts +0 -38
  102. package/dist/core/kubernetes-utils.d.ts.map +0 -1
  103. package/dist/core/kubernetes-utils.js +0 -177
  104. package/dist/core/memory.d.ts +0 -45
  105. package/dist/core/memory.d.ts.map +0 -1
  106. package/dist/core/memory.js +0 -113
  107. package/dist/core/schema.d.ts +0 -187
  108. package/dist/core/schema.d.ts.map +0 -1
  109. package/dist/core/schema.js +0 -655
  110. package/dist/core/session-utils.d.ts +0 -29
  111. package/dist/core/session-utils.d.ts.map +0 -1
  112. package/dist/core/session-utils.js +0 -121
  113. package/dist/core/workflow.d.ts +0 -70
  114. package/dist/core/workflow.d.ts.map +0 -1
  115. package/dist/core/workflow.js +0 -161
  116. package/dist/index.d.ts +0 -15
  117. package/dist/index.d.ts.map +0 -1
  118. package/dist/index.js +0 -32
  119. package/dist/interfaces/cli.d.ts +0 -74
  120. package/dist/interfaces/cli.d.ts.map +0 -1
  121. package/dist/interfaces/cli.js +0 -769
  122. package/dist/interfaces/mcp.d.ts +0 -30
  123. package/dist/interfaces/mcp.d.ts.map +0 -1
  124. package/dist/interfaces/mcp.js +0 -105
  125. package/dist/mcp/server.d.ts +0 -9
  126. package/dist/mcp/server.d.ts.map +0 -1
  127. package/dist/mcp/server.js +0 -151
  128. package/dist/tools/answer-question.d.ts +0 -27
  129. package/dist/tools/answer-question.d.ts.map +0 -1
  130. package/dist/tools/answer-question.js +0 -696
  131. package/dist/tools/choose-solution.d.ts +0 -23
  132. package/dist/tools/choose-solution.d.ts.map +0 -1
  133. package/dist/tools/choose-solution.js +0 -171
  134. package/dist/tools/deploy-manifests.d.ts +0 -25
  135. package/dist/tools/deploy-manifests.d.ts.map +0 -1
  136. package/dist/tools/deploy-manifests.js +0 -74
  137. package/dist/tools/generate-manifests.d.ts +0 -23
  138. package/dist/tools/generate-manifests.d.ts.map +0 -1
  139. package/dist/tools/generate-manifests.js +0 -424
  140. package/dist/tools/index.d.ts +0 -11
  141. package/dist/tools/index.d.ts.map +0 -1
  142. package/dist/tools/index.js +0 -34
  143. package/dist/tools/recommend.d.ts +0 -23
  144. package/dist/tools/recommend.d.ts.map +0 -1
  145. package/dist/tools/recommend.js +0 -332
package/docs/design.md ADDED
@@ -0,0 +1,750 @@
1
+ # DevOps AI Toolkit Architecture & Design
2
+
3
+ ## Current Implementation Status
4
+
5
+ **🟢 IMPLEMENTED**: Resource Schema Parser & Validator with AI-powered recommendations
6
+ **🟢 IMPLEMENTED**: Stage-based MCP workflow with conversational deployment
7
+ **🟢 IMPLEMENTED**: AI-powered manifest generation with validation
8
+ **🟡 IN PROGRESS**: CLI interface with core discovery features
9
+ **🔴 PLANNED**: Deployment Engine, Governance System
10
+
11
+ ## Overview
12
+
13
+ DevOps AI Toolkit is an intelligent Kubernetes application deployment agent designed to operate in two modes:
14
+
15
+ 1. **✅ CLI Mode** (Current): Standalone command-line tool with AI-powered recommendations
16
+ 2. **✅ MCP Mode** (Implemented): Model Context Protocol server for conversational deployment workflow
17
+
18
+ The system implements a discovery-driven workflow powered by Claude AI, evolved from the original inspiration in `ORIGINAL_INSPIRATION.md`.
19
+
20
+ ## External Agent Integration Flow
21
+
22
+ ```mermaid
23
+ sequenceDiagram
24
+ participant User
25
+ participant ExternalAgent as External Agent<br/>(Claude Code, Cursor, etc.)
26
+ participant DotAI as DevOps AI Toolkit<br/>(Our System)
27
+ participant K8s as Kubernetes Cluster
28
+
29
+ User->>ExternalAgent: "Deploy a web app with auto-scaling"
30
+
31
+ Note over ExternalAgent,DotAI: Phase 1: Get Recommendations
32
+ ExternalAgent->>DotAI: recommend --intent "web app with auto-scaling"
33
+ DotAI->>K8s: Discover resources & schemas
34
+ K8s-->>DotAI: CRDs + Standard resources
35
+ DotAI->>DotAI: AI analysis & ranking
36
+ DotAI-->>ExternalAgent: Complete solution with questions
37
+
38
+ Note over ExternalAgent,User: Phase 2: Choose Solution
39
+ ExternalAgent->>User: Present ranked solutions with scores/descriptions
40
+ User-->>ExternalAgent: Select preferred solution
41
+ ExternalAgent->>DotAI: chooseSolution(selectedSolutionId)
42
+ DotAI-->>ExternalAgent: Configuration questions by stage
43
+
44
+ Note over ExternalAgent,User: Phase 3: Progressive Configuration
45
+ ExternalAgent->>User: Present required questions
46
+ User-->>ExternalAgent: Provide required answers
47
+ ExternalAgent->>DotAI: answerQuestion(stage="required", answers)
48
+ ExternalAgent->>User: Present basic questions (optional)
49
+ User-->>ExternalAgent: Provide basic answers or skip
50
+ ExternalAgent->>DotAI: answerQuestion(stage="basic", answers)
51
+ ExternalAgent->>User: Present advanced questions (optional)
52
+ User-->>ExternalAgent: Provide advanced answers or skip
53
+ ExternalAgent->>DotAI: answerQuestion(stage="advanced", answers)
54
+ ExternalAgent->>User: Ask for open requirements
55
+ User-->>ExternalAgent: "handle 1000 req/sec with SSL"
56
+ ExternalAgent->>DotAI: answerQuestion(stage="open", answers)
57
+
58
+ Note over ExternalAgent,DotAI: Phase 4: Manifest Generation
59
+ ExternalAgent->>DotAI: generateManifests(solutionId)
60
+ DotAI->>DotAI: AI creates complete manifests<br/>with additional resources for open requirements
61
+ DotAI-->>ExternalAgent: Production-ready Kubernetes YAML
62
+
63
+ Note over ExternalAgent,K8s: Phase 5: Deployment (Planned)
64
+ ExternalAgent->>DotAI: deploy --manifests manifests/
65
+ DotAI->>K8s: kubectl apply with monitoring
66
+ K8s-->>DotAI: Deployment status
67
+ DotAI-->>ExternalAgent: Success/failure with details
68
+ ExternalAgent-->>User: "✅ App deployed successfully"
69
+ ```
70
+
71
+ ### Key Design Principles for External Agents
72
+
73
+ 1. **🔄 Session-Based State**: Stateful workflow managed via solutionId
74
+ 2. **📄 Complete Data Transfer**: Solutions include all necessary schemas and mappings
75
+ 3. **🔀 Flexible Workflow**: Agents can skip optional stages as needed
76
+ 4. **🎯 Progressive Disclosure**: Stage-based configuration (required → basic → advanced → open)
77
+ 5. **🔍 Transparent Process**: All AI reasoning and schema analysis is visible
78
+
79
+ ## Current Architecture
80
+
81
+ ```
82
+ ┌────────────────────────────────────────────────────────────┐
83
+ │ DevOps AI Toolkit Core │
84
+ │ (Powered by Claude AI) │
85
+ │ │
86
+ │ ┌───────────────────────────────────────────────────────┐ │
87
+ │ │ ✅ IMPLEMENTED COMPONENTS │ │
88
+ │ │ │ │
89
+ │ │ 🔍 KubernetesDiscovery │ │
90
+ │ │ • Cluster resource discovery (CRDs + K8s) │ │
91
+ │ │ • Schema introspection with kubectl explain │ │
92
+ │ │ • Dynamic capability detection │ │
93
+ │ │ │ │
94
+ │ │ 🤖 ResourceRecommender (AI-Powered) │ │
95
+ │ │ • Two-phase analysis (selection + ranking) │ │
96
+ │ │ • Standard + CRD resource support │ │
97
+ │ │ • Context-aware solution scoring │ │
98
+ │ │ │ │
99
+ │ │ ⚡ Stage-Based Workflow Tools │ │
100
+ │ │ • Progressive question disclosure │ │
101
+ │ │ • Session state management │ │
102
+ │ │ • AI-powered manifest generation │ │
103
+ │ │ │ │
104
+ │ │ 📋 SchemaParser & ManifestValidator │ │
105
+ │ │ • kubectl explain output parsing │ │
106
+ │ │ • Dry-run manifest validation │ │
107
+ │ │ • Field constraint extraction │ │
108
+ │ └───────────────────────────────────────────────────────┘ │
109
+ │ │
110
+ │ ┌─────────────────────┐ ┌───────────────────────────┐ │
111
+ │ │ ✅ CLI Interface │ │ ✅ MCP Mode (Current) │ │
112
+ │ │ │ │ │ │
113
+ │ │ • recommend command │ │ • Stage-based workflow │ │
114
+ │ │ • discover command │ │ • Session management │ │
115
+ │ │ • Help system │ │ • Tool integration │ │
116
+ │ │ • Progress tracking │ │ • Manifest generation │ │
117
+ │ └─────────────────────┘ └───────────────────────────┘ │
118
+ └─────────────────────────────────────────────────────────────┘
119
+ ```
120
+
121
+ ## Core Principles
122
+
123
+ 1. **✅ Discovery-Driven**: Works in any cluster by discovering CRDs and core K8s resources
124
+ 2. **✅ Resource-Agnostic**: Recommends ANY available Kubernetes resources (AppClaim, CloudRun, Knative, standard K8s, etc.)
125
+ 3. **🔄 Memory-Enhanced**: (Planned) Learn from successful deployments and failures
126
+ 4. **🔄 Dual Interface**: (Planned) Same intelligence, multiple interaction patterns
127
+ 5. **✅ Zero Hard-coding**: No assumptions about cluster platforms or specific CRDs
128
+ 6. **✅ AI-Powered**: Uses Claude for intelligent resource selection and manifest generation
129
+
130
+ ## Current Implementation Details
131
+
132
+ ### File Structure
133
+ ```
134
+ src/
135
+ ├── core/
136
+ │ ├── discovery.ts # ✅ KubernetesDiscovery class
137
+ │ ├── schema.ts # ✅ ResourceRecommender, SchemaParser (SolutionEnhancer moved to /src/legacy/)
138
+ │ ├── claude.ts # ✅ Claude AI integration
139
+ │ ├── index.ts # ✅ Core module exports
140
+ │ └── kubernetes-utils.ts # ✅ Shared kubectl utilities
141
+ ├── interfaces/
142
+ │ ├── cli.ts # ✅ CLI interface and commands
143
+ │ └── mcp.ts # ✅ MCP server (implemented)
144
+ ├── tools/ # ✅ MCP workflow tools
145
+ │ ├── recommend.ts # ✅ AI-powered recommendations
146
+ │ ├── choose-solution.ts # ✅ Solution selection handler
147
+ │ ├── answer-question.ts # ✅ Stage-based configuration
148
+ │ └── generate-manifests.ts # ✅ AI manifest generation
149
+ └── cli.ts # ✅ Main CLI entry point
150
+
151
+ tests/ # ✅ 565+ comprehensive tests
152
+ docs/ # ✅ Complete documentation
153
+ prompts/ # ✅ AI prompt templates
154
+ ```
155
+
156
+ ### Current Commands
157
+ ```bash
158
+ # ✅ Available now
159
+ dot-ai --help # Help system (no cluster required)
160
+ dot-ai recommend --intent "description" # AI-powered recommendations
161
+ npm run mcp:start # Start MCP server for full workflow
162
+
163
+ # ✅ MCP Tools (for interactive deployment)
164
+ # recommend({ intent: "description" }) # Get AI recommendations
165
+ # chooseSolution({ solutionId: "sol_..." }) # Select solution and get questions
166
+ # answerQuestion({ stage: "required", answers }) # Progressive configuration
167
+ # generateManifests({ solutionId: "sol_..." }) # AI-generated Kubernetes YAML
168
+
169
+ # 🔄 Planned
170
+ dot-ai deploy --manifests manifests/ # Deployment execution
171
+ ```
172
+
173
+ ## Universal Extensibility
174
+
175
+ 🔄 **The system adapts to ANY cluster configuration:**
176
+
177
+ - **Platform Clusters**: GKE with CloudRun, EKS with Lambda, AKS with ContainerApps
178
+ - **GitOps Clusters**: ArgoCD Applications, Flux HelmReleases, custom CI/CD CRDs
179
+ - **Serverless Clusters**: Knative Services, OpenFaaS Functions, Fission environments
180
+ - **Application Platforms**: DevOpsToolkit AppClaims, Crossplane Compositions, Helm Operator
181
+ - **Service Mesh**: Istio VirtualServices, Linkerd ServiceProfiles, custom mesh CRDs
182
+ - **Vanilla Kubernetes**: Standard Deployments, Services, Ingress - works everywhere
183
+ - **Custom Platforms**: Your organization's custom CRDs and abstractions
184
+
185
+ > The agent learns the schema of ANY discovered CRD through `kubectl explain` and generates appropriate manifests. No updates needed for new platforms!
186
+
187
+ ## 🔄 Governance & Guardrails (Planned - Task 9)
188
+
189
+ 🗣️ **Plain English governance - no YAML required:**
190
+
191
+ *Planned feature:* The agent will understand organizational policies written in natural language and apply them throughout the deployment process.
192
+
193
+ ### Policy Template Examples
194
+
195
+ **Security & Compliance:**
196
+ ```
197
+ # governance/security-policy.txt
198
+ Never allow privileged containers in production.
199
+ Always require security contexts with non-root users.
200
+ All images must come from gcr.io/my-company or registry.my-company.com.
201
+ Production deployments must have the labels: security.policy=restricted and compliance=sox.
202
+ ```
203
+
204
+ **Resource & Cost Controls:**
205
+ ```
206
+ # governance/resource-policy.txt
207
+ Development environments: maximum 3 replicas and 500m CPU per app.
208
+ Staging environments: maximum 10 replicas and 2 CPU cores per app.
209
+ Production environments: require approval for more than 20 replicas.
210
+ Never allow deployments that would cost more than $100/month without approval.
211
+ ```
212
+
213
+ **Platform Preferences:**
214
+ ```
215
+ # governance/platform-policy.txt
216
+ Prefer AppClaim over standard Kubernetes when available.
217
+ Never use AWS Lambda CRDs in our GCP environment.
218
+ Always use Knative for serverless workloads when available.
219
+ Require ingress capability for all web applications.
220
+ ```
221
+
222
+ **Environment Rules:**
223
+ ```
224
+ # governance/environment-policy.txt
225
+ Developers can only deploy to namespaces starting with "dev-" or "feature-".
226
+ Contractors can only deploy to the "sandbox" namespace.
227
+ Production deployments require approval from the platform team.
228
+ All applications must have monitoring enabled.
229
+ ```
230
+
231
+ ### How It Works
232
+
233
+ **1. Policy Loading:**
234
+ ```bash
235
+ # Agent reads plain English policies
236
+ dot-ai config set governance.policy-files "./governance/*.txt"
237
+ dot-ai governance validate # Checks if policies are understood
238
+ ```
239
+
240
+ **2. Runtime Application:**
241
+ The agent interprets policies contextually during each workflow step:
242
+
243
+ - **Discovery**: "Never use AWS Lambda CRDs" → filters out Lambda CRDs
244
+ - **Strategy**: "Prefer AppClaim over standard Kubernetes" → ranks AppClaim higher
245
+ - **Configuration**: "Maximum 3 replicas in development" → validates user input
246
+ - **Generation**: "Always require security contexts" → injects required fields
247
+ - **Deployment**: "Require approval for >$100/month" → triggers approval workflow
248
+
249
+ **3. Interactive Enforcement:**
250
+ ```
251
+ $ dot-ai recommend --intent "web app with 10 replicas"
252
+
253
+ 🛡️ Policy Check: Development limit is 3 replicas maximum.
254
+ Would you like to:
255
+ 1. Use 3 replicas instead (recommended)
256
+ 2. Request approval for 10 replicas
257
+ 3. Deploy to staging environment instead
258
+
259
+ Your choice [1]:
260
+ ```
261
+
262
+ ### Template System
263
+
264
+ **Starter Templates:**
265
+ ```bash
266
+ # Initialize with common templates
267
+ dot-ai governance init --template=startup
268
+ dot-ai governance init --template=enterprise
269
+ dot-ai governance init --template=regulated-industry
270
+ ```
271
+
272
+ **Custom Templates:**
273
+ ```
274
+ # governance/startup-template.txt
275
+ Keep costs low - maximum 2 replicas and 200m CPU in development.
276
+ All images must be scanned for vulnerabilities.
277
+ Prefer managed services over self-hosted when available.
278
+
279
+ # governance/enterprise-template.txt
280
+ All deployments must have cost-center and team labels.
281
+ Production requires approval from security and platform teams.
282
+ Enforce pod security standards and network policies.
283
+ Audit all deployments with compliance labels.
284
+
285
+ # governance/regulated-template.txt
286
+ All containers must run as non-root with read-only file systems.
287
+ Require approval for any external network access.
288
+ All deployments must be logged and auditable.
289
+ Encrypt all data at rest and in transit.
290
+ ```
291
+
292
+ ### Benefits
293
+
294
+ ✅ **User-Friendly**: No YAML or complex syntax to learn
295
+ ✅ **Expressive**: Natural language is more flexible than rigid schemas
296
+ ✅ **Maintainable**: Easy to read, understand, and modify policies
297
+ ✅ **AI-Native**: Leverages the agent's natural language understanding
298
+ ✅ **Context-Aware**: Agent applies policies intelligently based on situation
299
+ ✅ **Progressive**: Start simple, add complexity as needed
300
+
301
+ > **Key Insight**: Since the agent is AI-powered, governance should be too. Let users express their intent in natural language, and let the AI figure out how to enforce it.
302
+
303
+ ## Mode Comparison
304
+
305
+ | Aspect | Direct Agent Mode | MCP Mode |
306
+ |--------|------------------|----------|
307
+ | **User Interaction** | Direct Q&A with user | Structured guidance to calling agent |
308
+ | **Session Management** | Built-in via Claude Code SDK | Stateless function calls |
309
+ | **Output Format** | Human-readable text | JSON with workflow guidance |
310
+ | **Use Case** | Standalone deployment tool | Integration with other AI agents |
311
+ | **Complexity** | Simple CLI usage | Requires MCP-aware agent |
312
+
313
+ ## Current Workflow
314
+
315
+ ### 1. ✅ Cluster Discovery (Implemented)
316
+ The KubernetesDiscovery class automatically discovers:
317
+
318
+ ```typescript
319
+ // Discover CRDs with comprehensive metadata
320
+ const crds = await discovery.discoverCRDs();
321
+
322
+ // Get all API resources
323
+ const resources = await discovery.getAPIResources();
324
+
325
+ // Analyze resource schemas
326
+ const schema = await discovery.explainResource('Deployment');
327
+
328
+ // Dynamic capability detection
329
+ const clusterOptions = await discovery.discoverClusterOptions();
330
+ // Returns: namespaces, storageClasses, ingressClasses, nodeLabels
331
+ ```
332
+
333
+ **Current Discovery Features:**
334
+ - ✅ CRD discovery with schema analysis
335
+ - ✅ Standard K8s resource enumeration
336
+ - ✅ Dynamic cluster capability detection
337
+ - ✅ Schema introspection via kubectl explain
338
+ - ✅ Namespace/storage/ingress discovery
339
+
340
+ ### 2. ✅ AI-Powered Resource Selection (Implemented)
341
+ ResourceRecommender uses two-phase AI analysis:
342
+
343
+ ```typescript
344
+ // Phase 1: AI selects promising candidates from lightweight resource list
345
+ const candidates = await recommender.selectResourceCandidates(intent, allResources);
346
+
347
+ // Phase 2: Fetch detailed schemas and rank with AI
348
+ const schemas = await recommender.fetchDetailedSchemas(candidates, explainResource);
349
+ const solutions = await recommender.rankWithDetailedSchemas(intent, schemas);
350
+ ```
351
+
352
+ **Current Resource Support:**
353
+ - ✅ **CRDs**: AppClaim, CloudRun, Knative, Crossplane, ArgoCD, custom resources
354
+ - ✅ **Standard K8s**: Deployment, Service, Ingress, HPA, Job, CronJob
355
+ - ✅ **Mixed scenarios**: AI recommends both standard + custom resources
356
+ - ✅ **Ranking**: Scores solutions based on intent match and capabilities
357
+
358
+ ### 3. ✅ Dynamic Question Generation (Implemented)
359
+ Questions are generated based on resource schemas and user intent:
360
+
361
+ ```typescript
362
+ // Generate contextual questions
363
+ const questions = await recommender.generateQuestionsWithAI(solution, intent, clusterOptions);
364
+
365
+ // Returns categorized questions:
366
+ // - required: Essential for basic functionality
367
+ // - basic: Common configuration options
368
+ // - advanced: Power user optimizations
369
+ // - open: Free-form requirement capture
370
+ ```
371
+
372
+ **Current Question Features:**
373
+ - ✅ Schema-driven question generation
374
+ - ✅ Dynamic cluster options (real namespaces, storage classes)
375
+ - ✅ Progressive disclosure (required → basic → advanced)
376
+ - ✅ Open-ended requirement capture
377
+ - ✅ ResourceMapping for manifest generation
378
+
379
+ ### 4. ✅ Stage-Based Configuration (Implemented)
380
+ Progressive question answering through MCP tools:
381
+
382
+ ```typescript
383
+ // Stage-based workflow through MCP tools:
384
+ // 1. answerQuestion({ stage: "required", answers: {...} })
385
+ // 2. answerQuestion({ stage: "basic", answers: {...} })
386
+ // 3. answerQuestion({ stage: "advanced", answers: {...} })
387
+ // 4. answerQuestion({ stage: "open", answers: { "open": "auto-scaling for 1000 requests/sec" } })
388
+ // 5. generateManifests({ solutionId: "sol_..." })
389
+
390
+ // Results in:
391
+ // - Session-based state management via solutionId
392
+ // - Progressive disclosure of configuration options
393
+ // - AI-generated manifests with additional resources for open requirements
394
+ ```
395
+
396
+ **Current Stage-Based Features:**
397
+ - ✅ Progressive question disclosure (required → basic → advanced → open)
398
+ - ✅ Session state management via solutionId
399
+ - ✅ Open-ended requirement processing in final stage
400
+ - ✅ AI-powered manifest generation with validation
401
+ - ✅ Support for skipping optional stages
402
+
403
+ ## 🔄 Planned Features
404
+
405
+ ### ✅ Manifest Generation (Implemented)
406
+ ```typescript
407
+ // AI-generated manifests via MCP tool
408
+ generateManifests({ solutionId: "sol_..." })
409
+
410
+ // Implemented features:
411
+ // ✅ Schema-aware AI generation (no templates)
412
+ // ✅ Dynamic resource addition based on open requirements
413
+ // ✅ kubectl dry-run validation with retry loop
414
+ // ✅ Support for any CRD type (AppClaim, Crossplane, etc.)
415
+ // ✅ Production-ready YAML output
416
+ ```
417
+
418
+ ### Deployment Engine (Task 8)
419
+ ```bash
420
+ # Deploy generated manifests with monitoring
421
+ dot-ai deploy --manifests manifests/ --watch
422
+
423
+ # Features planned:
424
+ # - kubectl apply with progress tracking
425
+ # - Resource readiness monitoring
426
+ # - Rollback capabilities
427
+ # - Success/failure learning
428
+ ```
429
+
430
+ ### Memory & Learning System (Task 4)
431
+ ```typescript
432
+ // Learn from deployment outcomes
433
+ await memory.storePattern(solution, outcome, clusterFingerprint);
434
+ await memory.storeLessons(deployment, lessons);
435
+
436
+ // Apply learned patterns
437
+ const patterns = await memory.getSimilarPatterns(currentSolution);
438
+ ```
439
+
440
+ ### ✅ MCP Server Mode (Implemented)
441
+ ```typescript
442
+ // MCP tools for external agents (stage-based workflow)
443
+ const server = new MCPServer();
444
+ server.addTool('recommend', recommendHandler);
445
+ server.addTool('chooseSolution', chooseSolutionHandler);
446
+ server.addTool('answerQuestion', answerQuestionHandler);
447
+ server.addTool('generateManifests', generateManifestsHandler);
448
+ ```
449
+
450
+ > **Note**: The system is completely extensible - it will work with ANY Kubernetes resources (CRDs or core) available in your cluster. The examples above are just common patterns.
451
+
452
+ ### 3. Configuration Gathering
453
+ **Dynamic questions based on discovered resource schemas + user requirements:**
454
+
455
+ The agent analyzes the chosen resource's schema (`kubectl explain <resource>`) and the user's description to generate contextual questions.
456
+
457
+ **Examples of dynamic questioning:**
458
+
459
+ **If AppClaim CRD is chosen:**
460
+ ```bash
461
+ kubectl explain appclaim.spec
462
+ # Discovers: image, port, host, replicas, resources fields
463
+ ```
464
+ - "What's your container image?" (required by schema)
465
+ - "What port does your app listen on?" (required by schema)
466
+ - "Do you want a custom domain or auto-generated?" (based on host field options)
467
+ - "How many replicas initially?" (optional field, asks only if user mentioned scaling)
468
+
469
+ **If CloudRunService CRD is chosen:**
470
+ ```bash
471
+ kubectl explain cloudrunservice.spec.template.spec
472
+ # Discovers: serverless-specific fields, traffic allocation, etc.
473
+ ```
474
+ - "What's your container image?" (required)
475
+ - "What's your service port?" (required)
476
+ - "Do you want traffic splitting?" (only asks if schema supports it)
477
+ - "CPU/Memory limits?" (asks based on schema constraints)
478
+
479
+ **If standard Kubernetes is chosen:**
480
+ ```bash
481
+ kubectl explain deployment.spec.template.spec.containers
482
+ kubectl explain service.spec
483
+ kubectl explain ingress.spec
484
+ ```
485
+ - "Container image?" (Deployment requirement)
486
+ - "Service type: ClusterIP, LoadBalancer, or NodePort?" (based on available ServiceTypes)
487
+ - "Need external access?" (only if Ingress CRD exists)
488
+ - "Enable auto-scaling?" (only if HPA CRD exists)
489
+
490
+ **If custom CRD `MyPlatformApp` is discovered:**
491
+ ```bash
492
+ kubectl explain myplatformapp.spec
493
+ # System learns: whatever fields exist in this custom resource
494
+ ```
495
+ - Questions generated dynamically from the schema
496
+ - User description influences which optional fields to ask about
497
+
498
+ **User Intent Influences Questions:**
499
+
500
+ If user says: `"web app with auto-scaling"` → Asks about HPA settings (if available)
501
+ If user says: `"batch job that runs nightly"` → Focuses on CronJob fields, doesn't ask about services
502
+ If user says: `"microservice with database"` → Asks about ConfigMaps, Secrets, storage
503
+ If user says: `"simple web app"` → Asks minimal questions, uses smart defaults
504
+
505
+ > **Key**: Questions are never static - they're generated by analyzing resource schemas and matching them to user intent.
506
+
507
+ ### 4. Manifest Generation
508
+ - Generate manifests using discovered CRD schemas
509
+ - Apply memory lessons (ELB→IP resolution, resource patterns, etc.)
510
+ - Validate against cluster capabilities
511
+
512
+ ### 5. Deployment & Monitoring
513
+ - Deploy resources
514
+ - Monitor until healthy/failed
515
+ - Store lessons learned for future use
516
+
517
+ ## Direct Agent Mode
518
+
519
+ The Direct Agent Mode provides a standalone CLI tool called `dot-ai` that users can install and run directly.
520
+
521
+ ### Installation & Usage
522
+ ```bash
523
+ # Install the CLI tool globally
524
+ npm install -g dot-ai
525
+
526
+ # Get AI-powered deployment recommendations
527
+ dot-ai recommend --intent "web app with auto-scaling"
528
+
529
+ # Choose solution and configure step-by-step
530
+ dot-ai choose-solution --solution-id sol_xxx --session-dir ./tmp
531
+
532
+ # Generate and deploy manifests
533
+ dot-ai generate-manifests --solution-id sol_xxx --session-dir ./tmp
534
+ dot-ai deploy-manifests --solution-id sol_xxx --session-dir ./tmp
535
+
536
+ # Check status of a deployment
537
+ dot-ai status my-app
538
+ ```
539
+
540
+ > **Note**: `dot-ai` is the proposed CLI command name. Alternative names could be `kubectl-app-deploy`, `kube-app`, `k8s-dot-ai`, etc.
541
+
542
+ ### Example Interaction
543
+ ```
544
+ $ dot-ai recommend --intent "web app with auto-scaling"
545
+
546
+ 🔍 Discovering cluster capabilities...
547
+ ✅ Found: AppClaim (DevOpsToolkit), CloudRunService (GCP), Standard K8s
548
+ (Note: This varies per cluster - could be Knative, ArgoCD, or any other CRDs)
549
+
550
+ 🤔 I can deploy your web app using:
551
+ 1. AppClaim (recommended) - Managed by DevOpsToolkit
552
+ 2. CloudRunService - Serverless on GCP
553
+ 3. Standard Kubernetes - Manual configuration
554
+
555
+ Which would you prefer? [1]: 1
556
+
557
+ 📝 Using AppClaim for managed deployment.
558
+ (Questions generated from `kubectl explain appclaim.spec`)
559
+ What's your container image? nginx:latest
560
+ What port does your app listen on? [80]: 80
561
+ Do you want auto-scaling? [Y/n]: Y
562
+ Note: Different CRDs = different questions based on their schemas
563
+
564
+ 🚀 Generating AppClaim manifest...
565
+ ✅ Generated manifest with lessons applied:
566
+ - Host: my-app.12.34.56.78.nip.io (resolved ELB to IP)
567
+ - Resources: CPU 100m-500m, Memory 128Mi-512Mi (AKS baseline)
568
+
569
+ Deploy now? [Y/n]: Y
570
+
571
+ 🎯 Deploying...
572
+ ⏳ Waiting for AppClaim to create App composite...
573
+ ⏳ Waiting for pods to be ready...
574
+ ✅ Deployment successful!
575
+
576
+ 🌐 Your app is running at: https://my-app.12.34.56.78.nip.io
577
+
578
+ 💾 Stored lessons learned for future deployments.
579
+ ```
580
+
581
+ ## MCP Mode
582
+
583
+ ### MCP Functions
584
+
585
+ #### `create_application`
586
+ Entry point - discovers cluster and returns initial guidance
587
+ ```typescript
588
+ // Input: {}
589
+ // Output:
590
+ {
591
+ cluster_discovery: {
592
+ available_crds: [...],
593
+ deployment_strategies: [...]
594
+ },
595
+ agent_question: {
596
+ question: "I found AppClaim and CloudRun. Which platform?",
597
+ options: ["appclaim", "cloudrun", "kubernetes"],
598
+ reasoning: "AppClaim provides managed deployment, CloudRun is serverless",
599
+ note: "Available options vary per cluster based on discovered CRDs"
600
+ },
601
+ workflow_guidance: {
602
+ next_action: "await_user_choice",
603
+ expected_input: "platform_choice"
604
+ }
605
+ }
606
+ ```
607
+
608
+ #### `continue_workflow`
609
+ Continue based on user input - questions generated from resource schema
610
+ ```typescript
611
+ // Input: { user_choice: "appclaim", context: {...} }
612
+ // Agent runs: kubectl explain appclaim.spec
613
+ // Output:
614
+ {
615
+ progress: "platform_selected",
616
+ agent_question: {
617
+ question: "What's your container image?",
618
+ validation: "Must be valid container image format",
619
+ schema_context: "Required field in appclaim.spec.image",
620
+ why_asking: "AppClaim schema requires container image specification"
621
+ },
622
+ workflow_guidance: {
623
+ next_action: "await_user_input",
624
+ expected_input: "container_image"
625
+ }
626
+ }
627
+
628
+ // Different resource = different questions
629
+ // Input: { user_choice: "knativeservice", context: {...} }
630
+ // Agent runs: kubectl explain knativeservice.spec.template.spec.containers
631
+ // Output:
632
+ {
633
+ progress: "platform_selected",
634
+ agent_question: {
635
+ question: "What's your container image and what environment variables do you need?",
636
+ validation: "Image format: registry/image:tag, EnvVars: KEY=value pairs",
637
+ schema_context: "KnativeService requires image, envVars are common",
638
+ why_asking: "Knative schema analysis shows these are typical requirements"
639
+ },
640
+ workflow_guidance: {
641
+ next_action: "await_user_input",
642
+ expected_input: "container_config"
643
+ }
644
+ }
645
+ ```
646
+
647
+ #### `deploy_application`
648
+ Execute deployment when ready
649
+ ```typescript
650
+ // Input: { config: {...} }
651
+ // Output:
652
+ {
653
+ deployment_status: "in_progress",
654
+ deployment_id: "abc123",
655
+ monitoring_guidance: {
656
+ next_action: "poll_status",
657
+ poll_interval: "10s",
658
+ timeout: "300s"
659
+ }
660
+ }
661
+ ```
662
+
663
+ #### `get_deployment_status`
664
+ Monitor deployment progress
665
+ ```typescript
666
+ // Input: { deployment_id: "abc123" }
667
+ // Output:
668
+ {
669
+ status: "healthy" | "deploying" | "failed",
670
+ resources: [...],
671
+ access_url: "https://my-app.12.34.56.78.nip.io",
672
+ lessons_learned: [...]
673
+ }
674
+ ```
675
+
676
+ ### MCP Usage Example
677
+ ```typescript
678
+ // External agent using the MCP
679
+ const mcp = new AppManagementMCP();
680
+
681
+ // Start workflow
682
+ const initial = await mcp.create_application();
683
+ // Returns: "I found AppClaim and CloudRun. Which platform?"
684
+
685
+ // Agent asks user, gets "appclaim"
686
+ const step2 = await mcp.continue_workflow({
687
+ user_choice: "appclaim",
688
+ context: initial.context
689
+ });
690
+ // Returns: "What's your container image?"
691
+
692
+ // Continue until ready to deploy
693
+ const final = await mcp.deploy_application({
694
+ config: gatheredConfig
695
+ });
696
+ // Returns: deployment status and monitoring guidance
697
+ ```
698
+
699
+ ## Implementation Technology
700
+
701
+ ### Claude Code SDK
702
+ - **Primary Engine**: All agent intelligence powered by Claude Code SDK
703
+ - **Session Management**: Built-in conversation state handling
704
+ - **JSON Output**: Perfect for MCP structured responses
705
+ - **Multi-turn**: Handles complex deployment workflows
706
+ - **Tool Integration**: Can call kubectl, validate manifests, etc.
707
+
708
+ ### Architecture Components
709
+ - **Agent Core**: Claude Code SDK with Kubernetes system prompt
710
+ - **Mode Switch**: Runtime configuration for output format
711
+ - **CRD Discovery**: Dynamic cluster capability detection
712
+ - **Memory System**: JSON-based lesson storage and retrieval
713
+ - **Workflow Engine**: State machine for deployment process
714
+
715
+ ## File Structure
716
+ ```
717
+ dot-ai/
718
+ ├── docs/ # Documentation
719
+ │ ├── design.md # Architecture and workflow
720
+ │ ├── CONTEXT.md # Quick reference for new sessions
721
+ │ └── ORIGINAL_INSPIRATION.md # Starting prompt (reference only)
722
+ ├── src/
723
+ │ ├── agent/ # Core agent logic
724
+ │ │ ├── core.ts # Claude Code SDK wrapper
725
+ │ │ ├── discovery.ts # CRD discovery and analysis
726
+ │ │ ├── memory.ts # Lesson storage/retrieval
727
+ │ │ └── workflow.ts # Deployment state machine
728
+ │ ├── modes/ # Output mode handlers
729
+ │ │ ├── direct.ts # CLI direct interaction
730
+ │ │ └── mcp.ts # MCP server functions
731
+ │ ├── utils/ # Utilities
732
+ │ │ ├── kubectl.ts # Kubernetes API wrapper
733
+ │ │ ├── manifest.ts # Manifest generation
734
+ │ │ └── validation.ts # Schema validation
735
+ │ └── types/ # TypeScript definitions
736
+ ├── templates/ # Manifest templates (fallbacks)
737
+ ├── memory/ # Lesson storage
738
+ ├── config/ # Configuration files
739
+ └── bin/ # CLI entry points
740
+ ├── dot-ai # Direct mode CLI executable
741
+ └── dot-ai-mcp # MCP server executable
742
+ ```
743
+
744
+ ## Next Steps
745
+
746
+ 1. **Define detailed API specifications** for all MCP functions
747
+ 2. **Create implementation guide** with Claude Code SDK integration
748
+ 3. **Design memory system** for lesson storage and retrieval
749
+ 4. **Plan testing strategy** for both modes
750
+ 5. **Document deployment and configuration** requirements