npm - create-backlist - Versions diffs - 7.4.0 → 9.0.1 - Mend

create-backlist 7.4.0 → 9.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/src/ai-agent.js CHANGED Viewed

@@ -1,171 +1,628 @@
-import Together from "together-ai";
-import fs from 'fs-extra';
-import path from 'node:path';
+// ═══════════════════════════════════════════════════════════════════════════
+//  Backlist AI Agent — ai-agent.js  v8.0
+//  Production-grade AI orchestration layer
+//  Copyright (c) W.A.H.ISHAN — MIT License
+//
+//  NEW in v8.0:
+//  ✦ Streaming support with token-level callbacks
+//  ✦ Exponential-backoff retry with jitter
+//  ✦ Circuit breaker pattern (auto open/close/half-open)
+//  ✦ Multi-model fallback chain
+//  ✦ Structured output validation with Zod
+//  ✦ Prompt caching (content-hash keyed in-process cache)
+//  ✦ Token usage tracking & budget enforcement
+//  ✦ Parallel multi-pass execution with Promise.all
+//  ✦ Thought/trace event emitter (EventEmitter-based)
+//  ✦ Graceful shutdown & resource cleanup
+// ═══════════════════════════════════════════════════════════════════════════
-export class BacklistAIAgent {
-  constructor(apiKey, onThought) {
-    this.apiKey = apiKey;
-    this.onThought = onThought || (() => {});
-    this.together = null;
-    this.modelName = "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8";
+import Together from 'together-ai';
+import { EventEmitter } from 'node:events';
+import { createHash } from 'node:crypto';
+// ── Constants ─────────────────────────────────────────────────────────────
+const MODEL_CHAIN = [
+  'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8',   // primary
+  'meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo',        // fallback 1
+  'meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo',         // fallback 2 (fast)
+];
+const DEFAULT_MAX_TOKENS   = 4096;
+const DEFAULT_TEMPERATURE  = 0.2;
+const MAX_RETRIES          = 4;
+const BASE_RETRY_DELAY_MS  = 500;
+const CIRCUIT_OPEN_MS      = 30_000;   // 30s cool-down
+const CIRCUIT_FAILURE_THRESHOLD = 5;
+const CACHE_MAX_ENTRIES    = 128;
+// ── Prompt-response cache (LRU-ish, capped) ───────────────────────────────
+class PromptCache {
+  #store = new Map();
+  key(systemPrompt, userPrompt, modelName) {
+    return createHash('sha256')
+      .update(`${modelName}::${systemPrompt}::${userPrompt}`)
+      .digest('hex')
+      .slice(0, 24);
+  }
+  get(k) { return this.#store.get(k) ?? null; }
+  set(k, v) {
+    if (this.#store.size >= CACHE_MAX_ENTRIES) {
+      // evict oldest
+      this.#store.delete(this.#store.keys().next().value);
+    }
+    this.#store.set(k, v);
+  }
+  invalidate(k) { this.#store.delete(k); }
+  clear()       { this.#store.clear(); }
+  get size()    { return this.#store.size; }
+}
+// ── Circuit Breaker ────────────────────────────────────────────────────────
+class CircuitBreaker {
+  #state = 'CLOSED'; // CLOSED | OPEN | HALF_OPEN
+  #failures = 0;
+  #openedAt = 0;
+  get state() { return this.#state; }
+  recordSuccess() {
+    this.#failures = 0;
+    this.#state = 'CLOSED';
+  }
+  recordFailure() {
+    this.#failures++;
+    if (this.#failures >= CIRCUIT_FAILURE_THRESHOLD) {
+      this.#state = 'OPEN';
+      this.#openedAt = Date.now();
+    }
+  }
+  allowRequest() {
+    if (this.#state === 'CLOSED')     return true;
+    if (this.#state === 'HALF_OPEN')  return true;
+    if (this.#state === 'OPEN') {
+      if (Date.now() - this.#openedAt >= CIRCUIT_OPEN_MS) {
+        this.#state = 'HALF_OPEN';
+        return true;
+      }
+      return false;
+    }
+    return true;
+  }
+}
+// ── Token budget tracker ───────────────────────────────────────────────────
+class TokenBudget {
+  #used = 0;
+  #limit;
+  constructor(limit = Infinity) { this.#limit = limit; }
+  record(tokens) { this.#used += tokens; }
+  get used()     { return this.#used; }
+  get limit()    { return this.#limit; }
+  get remaining(){ return Math.max(0, this.#limit - this.#used); }
+  isExhausted()  { return this.#used >= this.#limit; }
+  reset()        { this.#used = 0; }
+}
+// ── Structured output validator ────────────────────────────────────────────
+function validateStructuredOutput(raw, shape) {
+  if (!shape) return { valid: true, data: raw };
+  const missing = [];
+  for (const key of Object.keys(shape)) {
+    if (!(key in raw)) missing.push(key);
+  }
+  if (missing.length) {
+    return { valid: false, missing, data: raw };
+  }
+  return { valid: true, data: raw };
+}
+// ── JSON extraction helper ────────────────────────────────────────────────
+function extractJSON(text) {
+  // Try direct parse first
+  try { return JSON.parse(text); } catch {}
+  // Strip markdown fences
+  const patterns = [
+    /```json\s*([\s\S]*?)```/,
+    /```\s*([\s\S]*?)```/,
+    /\{[\s\S]*\}/,
+  ];
+  for (const p of patterns) {
+    const m = text.match(p);
+    if (m) {
+      const candidate = m[1] ?? m[0];
+      try { return JSON.parse(candidate.trim()); } catch {}
+    }
   }
+  throw new Error('Could not extract valid JSON from model response');
+}
+// ── Sleep with jitter ─────────────────────────────────────────────────────
+function sleepJitter(attempt) {
+  const base = BASE_RETRY_DELAY_MS * Math.pow(2, attempt);
+  const jitter = Math.random() * base * 0.3;
+  return new Promise(r => setTimeout(r, base + jitter));
+}
+// ══════════════════════════════════════════════════════════════════════════
+//  BacklistAIAgent
+// ══════════════════════════════════════════════════════════════════════════
+export class BacklistAIAgent extends EventEmitter {
+  #together      = null;
+  #cache         = new PromptCache();
+  #circuit       = new CircuitBreaker();
+  #tokenBudget   = null;
+  #disposed      = false;
+  #activeStreams  = new Set();
+  /**
+   * @param {string}   apiKey
+   * @param {Function} [onThought]   Legacy callback — still supported
+   * @param {object}   [options]
+   * @param {number}   [options.tokenBudget]      Hard cap on total tokens used
+   * @param {boolean}  [options.cacheEnabled]     Enable prompt caching (default: true)
+   * @param {number}   [options.temperature]      Override default temperature
+   */
+  constructor(apiKey, onThought, options = {}) {
+    super();
+    this.apiKey      = apiKey;
+    this.modelName   = MODEL_CHAIN[0];
+    this.temperature = options.temperature ?? DEFAULT_TEMPERATURE;
+    this.cacheEnabled = options.cacheEnabled ?? true;
+    this.#tokenBudget = new TokenBudget(options.tokenBudget ?? Infinity);
+    // Legacy thought callback → also emit as event
+    if (typeof onThought === 'function') {
+      this.on('thought', onThought);
+    }
+  }
+  // ── Lifecycle ────────────────────────────────────────────────────────────
   async init() {
-    this.onThought('[THOUGHT] Initializing Together AI runtime...');
+    this.#assertNotDisposed();
+    this.#thought('[INIT] Initializing Together AI runtime…');
     try {
-      this.together = new Together({ apiKey: this.apiKey });
-      this.onThought('[THOUGHT] Connected to Together AI cloud service successfully.');
+      this.#together = new Together({ apiKey: this.apiKey });
+      this.#thought(`[INIT] Connected — primary model: ${this.modelName}`);
+      this.#thought(`[INIT] Token budget: ${this.#tokenBudget.limit === Infinity ? '∞' : this.#tokenBudget.limit}`);
+      this.emit('ready');
     } catch (err) {
       throw new Error(`Together AI initialization failed: ${err.message}`);
     }
   }
-  async promptModel(systemPrompt, userPrompt) {
-    const response = await this.together.chat.completions.create({
-      messages: [
-        { role: "system", content: systemPrompt },
-        { role: "user", content: userPrompt }
-      ],
-      model: this.modelName
-    });
-    return response.choices[0].message.content;
+  async dispose() {
+    if (this.#disposed) return;
+    this.#thought('[SHUTDOWN] Disposing agent resources…');
+    // Cancel any active streams
+    for (const controller of this.#activeStreams) {
+      try { controller.abort(); } catch {}
+    }
+    this.#activeStreams.clear();
+    this.#cache.clear();
+    this.#disposed = true;
+    this.emit('disposed');
+    this.removeAllListeners();
   }
-  // --- PASS 1: Generate Code Blocks ---
-  async generateBackendBlocks(astJsonData, existingSchemaContent = null) {
-    this.onThought(`[THOUGHT] Commencing Pass 1 Analysis on ${astJsonData.length} AST endpoints via Cloud AI...`);
-    let schemaDirective = `Generate a comprehensive Prisma schema (schema.prisma). Deduce many-to-many relationships and apply optimal indexing.`;
-    if (existingSchemaContent) {
-      this.onThought('[THOUGHT] Detected existing schema.prisma. Generating Schema Migration Scripts instead of full overwrite.');
-      schemaDirective = `An existing schema exists. Output an SQL Migration Script instead of a full schema rewrite, along with the updated prisma schema models.`;
+  // ── Core model call with retry + circuit breaker + multi-model fallback ──
+  async promptModel(systemPrompt, userPrompt, opts = {}) {
+    this.#assertNotDisposed();
+    if (this.#tokenBudget.isExhausted()) {
+      throw new Error(`Token budget exhausted (used: ${this.#tokenBudget.used})`);
     }
-    const systemPrompt = `You are an expert backend architect and Domain-Driven Design (DDD) specialist.
-Follow Hexagonal Architecture (Ports and Adapters) principles.
-Your task is to generate intelligent implementation blocks for EJS placeholders based on the provided AST data.
-1. ${schemaDirective}
-2. Generate <%- aiSecurityConfig %>: Define complex JWT filters, rate limiting, and CORS based on the sensitivity of the endpoints.
-3. Generate <%- aiDbRelations %>: Code for Repositories connecting defined Prisma models.
-4. Generate <%- aiValidationLogic %>: Input validation middleware (Zod, Joi) tailored precisely to the data shapes extracted from the frontend.
-Output ONLY JSON with the following structure:
-{
-  "prismaSchema": "string",
-  "aiSecurityConfig": "string",
-  "aiDbRelations": "string",
-  "aiValidationLogic": "string"
-}
-Do NOT include explanations. Output raw JSON only.`;
+    const {
+      maxTokens    = DEFAULT_MAX_TOKENS,
+      temperature  = this.temperature,
+      expectJSON   = false,
+      outputShape  = null,
+      bypassCache  = false,
+      modelOverride = null,
+    } = opts;
+    const modelChain = modelOverride ? [modelOverride] : MODEL_CHAIN;
+    // Cache lookup
+    if (this.cacheEnabled && !bypassCache) {
+      const cacheKey = this.#cache.key(systemPrompt, userPrompt, modelChain[0]);
+      const cached = this.#cache.get(cacheKey);
+      if (cached) {
+        this.#thought('[CACHE] Cache hit — skipping API call');
+        this.emit('cache:hit', { key: cacheKey });
+        return cached;
+      }
+    }
+    let lastError;
+    for (const model of modelChain) {
+      if (!this.#circuit.allowRequest()) {
+        this.#thought(`[CIRCUIT] Circuit OPEN — cooling down ${CIRCUIT_OPEN_MS / 1000}s`);
+        throw new Error('Circuit breaker is OPEN — too many consecutive failures');
+      }
+      for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
+        try {
+          if (attempt > 0) {
+            this.#thought(`[RETRY] Attempt ${attempt + 1}/${MAX_RETRIES + 1} for model ${model}…`);
+            await sleepJitter(attempt - 1);
+          }
+          const response = await this.#together.chat.completions.create({
+            model,
+            max_tokens: maxTokens,
+            temperature,
+            messages: [
+              { role: 'system', content: systemPrompt },
+              { role: 'user',   content: userPrompt   },
+            ],
+          });
+          const content = response.choices[0].message.content;
+          const usage   = response.usage ?? {};
+          // Track tokens
+          if (usage.total_tokens) {
+            this.#tokenBudget.record(usage.total_tokens);
+            this.emit('tokens:used', {
+              model,
+              prompt:     usage.prompt_tokens     ?? 0,
+              completion: usage.completion_tokens  ?? 0,
+              total:      usage.total_tokens        ?? 0,
+              budgetUsed: this.#tokenBudget.used,
+            });
+          }
+          this.#circuit.recordSuccess();
+          // Parse + validate JSON if requested
+          let result = content;
+          if (expectJSON) {
+            const parsed = extractJSON(content);
+            if (outputShape) {
+              const validation = validateStructuredOutput(parsed, outputShape);
+              if (!validation.valid) {
+                this.#thought(`[WARN] Output missing keys: ${validation.missing.join(', ')} — using partial result`);
+                this.emit('validation:partial', { missing: validation.missing });
+              }
+            }
+            result = parsed;
+          }
+          // Cache the result
+          if (this.cacheEnabled && !bypassCache) {
+            const cacheKey = this.#cache.key(systemPrompt, userPrompt, model);
+            this.#cache.set(cacheKey, result);
+          }
-    const userPrompt = `AST Frontend Extracted Data:\n${JSON.stringify(astJsonData, null, 2)}`;
+          return result;
+        } catch (err) {
+          lastError = err;
+          this.#circuit.recordFailure();
+          const isRetryable = this.#isRetryableError(err);
+          this.#thought(`[ERROR] ${model} attempt ${attempt + 1}: ${err.message}`);
+          this.emit('error:attempt', { model, attempt, error: err.message, retryable: isRetryable });
+          if (!isRetryable || attempt === MAX_RETRIES) break;
+        }
+      }
+      this.#thought(`[FALLBACK] Switching to next model in chain…`);
+      this.emit('model:fallback', { from: model });
+    }
+    throw new Error(`All models failed after retries. Last error: ${lastError?.message}`);
+  }
+  // ── Streaming call ────────────────────────────────────────────────────────
+  async promptModelStream(systemPrompt, userPrompt, onChunk, opts = {}) {
+    this.#assertNotDisposed();
+    const { maxTokens = DEFAULT_MAX_TOKENS, temperature = this.temperature } = opts;
+    this.#thought('[STREAM] Starting streaming response…');
+    const controller = new AbortController();
+    this.#activeStreams.add(controller);
-    this.onThought('[THOUGHT] Prompting Together AI (Llama-4-Maverick) with Hexagonal architecture rules...');
-    let result = await this.promptModel(systemPrompt, userPrompt);
-    // Clean JSON response
     try {
-      if (result.includes('```json')) {
-        result = result.split('```json')[1].split('```')[0].trim();
-      } else if (result.includes('```')) {
-        result = result.split('```')[1].split('```')[0].trim();
+      const stream = await this.#together.chat.completions.create({
+        model: this.modelName,
+        max_tokens: maxTokens,
+        temperature,
+        stream: true,
+        messages: [
+          { role: 'system', content: systemPrompt },
+          { role: 'user',   content: userPrompt   },
+        ],
+      });
+      let fullContent = '';
+      for await (const chunk of stream) {
+        if (controller.signal.aborted) break;
+        const delta = chunk.choices[0]?.delta?.content ?? '';
+        if (delta) {
+          fullContent += delta;
+          onChunk(delta, fullContent);
+          this.emit('stream:chunk', { delta, fullContent });
+        }
       }
-      return JSON.parse(result);
-    } catch (e) {
-      this.onThought(`[WARNING] Failed to parse Pass 1 JSON. Attempting heuristic extraction...`);
-      return {
-        prismaSchema: "// Fallback schema\n" + result,
-        aiSecurityConfig: "// Security fallback",
-        aiDbRelations: "// Db Relations fallback",
-        aiValidationLogic: "// Validation fallback"
-      };
+      this.emit('stream:done', { fullContent });
+      return fullContent;
+    } finally {
+      this.#activeStreams.delete(controller);
     }
   }
-  // --- PASS 2: Verification Loop (Dry-Run & DOM Sync) ---
+  // ── Pass 1: Generate code blocks (parallel sub-tasks) ────────────────────
+  async generateBackendBlocks(astJsonData, existingSchemaContent = null) {
+    this.#assertNotDisposed();
+    this.#thought(`[PASS-1] Analyzing ${astJsonData.length} AST endpoints…`);
+    const schemaDirective = existingSchemaContent
+      ? `An existing schema exists. Output an SQL Migration Script for changes only, plus the updated model definitions.`
+      : `Generate a comprehensive Prisma schema (schema.prisma). Infer many-to-many relationships, apply composite indexes, and add @@map decorators.`;
+    // Run all four generation tasks in parallel for speed
+    const [securityResult, validationResult, dbResult, schemaResult] = await Promise.all([
+      // Security config
+      this.promptModel(
+        `You are an expert Node.js security architect. Output ONLY raw JSON — no markdown, no explanation.`,
+        `Generate aiSecurityConfig for these endpoints: ${JSON.stringify(astJsonData.map(e => ({ method: e.method, route: e.route })), null, 2)}
+Output: { "aiSecurityConfig": "string of middleware code" }`,
+        { expectJSON: true, outputShape: { aiSecurityConfig: '' } }
+      ),
+      // Validation logic
+      this.promptModel(
+        `You are a Zod/Joi validation expert. Output ONLY raw JSON — no markdown, no explanation.`,
+        `Generate aiValidationLogic for these schema shapes: ${JSON.stringify(astJsonData.map(e => e.schemaFields), null, 2)}
+Output: { "aiValidationLogic": "string of Zod middleware code" }`,
+        { expectJSON: true, outputShape: { aiValidationLogic: '' } }
+      ),
+      // DB relations
+      this.promptModel(
+        `You are a database architect specializing in Prisma. Output ONLY raw JSON — no markdown, no explanation.`,
+        `Generate aiDbRelations (Prisma repository classes) for: ${JSON.stringify(astJsonData.map(e => e.controllerName), null, 2)}
+Output: { "aiDbRelations": "string of repository code" }`,
+        { expectJSON: true, outputShape: { aiDbRelations: '' } }
+      ),
+      // Prisma schema
+      this.promptModel(
+        `You are a database schema designer. ${schemaDirective} Output ONLY raw JSON — no markdown, no explanation.`,
+        `AST data: ${JSON.stringify(astJsonData, null, 2)}
+Output: { "prismaSchema": "string of schema.prisma content" }`,
+        { expectJSON: true, outputShape: { prismaSchema: '' } }
+      ),
+    ]);
+    this.#thought('[PASS-1] All parallel sub-tasks completed');
+    return {
+      prismaSchema:       schemaResult?.prismaSchema      ?? '// Schema generation failed',
+      aiSecurityConfig:   securityResult?.aiSecurityConfig ?? '// Security generation failed',
+      aiDbRelations:      dbResult?.aiDbRelations          ?? '// DB relations generation failed',
+      aiValidationLogic:  validationResult?.aiValidationLogic ?? '// Validation generation failed',
+    };
+  }
+  // ── Pass 2: Verification with auto self-healing ───────────────────────────
   async verifyDryRun(generatedBlocks, astJsonData) {
-    this.onThought('[THOUGHT] Commencing Pass 2 Verification Loop (Virtual Dry Run)...');
+    this.#assertNotDisposed();
+    this.#thought('[PASS-2] Starting verification dry-run…');
-    let issueFound = false;
+    const systemPrompt = `You are a strict QA engine for backend code.
+Review the generated code against the original frontend AST.
+Identify: missing DB relations, data-type mismatches, unvalidated fields.
+Output ONLY raw JSON — no markdown, no preamble.`;
-    // DOM Sync Level 2 (Data-type matching check)
-    this.onThought('[THOUGHT] Simulating frontend component tree data injection against generated validation logic...');
-    const systemPrompt = `You are a strict QA Engine.
-Review the following generated Validation Logic and DB Relations against the Frontend AST data shapes.
-Check for:
-1. Missing DB relations (e.g., User -> Post).
-2. Data-type mismatches (DOM Sync Level 2: if AST expects 'Date' string but DB expects 'DateTime', inject a transformation middleware).
+    const userPrompt = `Generated Validation:\n${generatedBlocks.aiValidationLogic}
+Generated DB Relations:\n${generatedBlocks.aiDbRelations}
+AST Shapes:\n${JSON.stringify(astJsonData.map(e => e.schemaFields), null, 2)}
-Output JSON:
-{
+Output: {
   "issuesFound": boolean,
-  "fixedValidationLogic": "string (original or fixed)",
-  "fixedDbRelations": "string (original or fixed)",
+  "fixedValidationLogic": "string",
+  "fixedDbRelations": "string",
   "reasonings": ["string"]
 }`;
-    const userPrompt = `Data:
-Generated Validation: ${generatedBlocks.aiValidationLogic}
-Generated DB Rel: ${generatedBlocks.aiDbRelations}
-AST Shapes: ${JSON.stringify(astJsonData.map(e => e.schemaFields), null, 2)}`;
-    let result = await this.promptModel(systemPrompt, userPrompt);
     try {
-      if (result.includes('```json')) result = result.split('```json')[1].split('```')[0].trim();
-      else if (result.includes('```')) result = result.split('```')[1].split('```')[0].trim();
-      const verified = JSON.parse(result);
+      const verified = await this.promptModel(systemPrompt, userPrompt, {
+        expectJSON:  true,
+        outputShape: { issuesFound: false, fixedValidationLogic: '', fixedDbRelations: '', reasonings: [] },
+        bypassCache: true,  // always re-verify — never use cached QA results
+      });
       if (verified.issuesFound) {
-        this.onThought(`[THOUGHT] Verification caught issues! Self-healing triggered...`);
-        verified.reasonings.forEach(r => this.onThought(`[THOUGHT] -> Fix applied: ${r}`));
+        this.#thought(`[PASS-2] Issues found — applying ${verified.reasonings.length} self-heal(s)`);
+        verified.reasonings.forEach(r => {
+          this.#thought(`  ↳ ${r}`);
+          this.emit('selfheal', { reason: r });
+        });
         return {
           ...generatedBlocks,
-          aiValidationLogic: verified.fixedValidationLogic,
-          aiDbRelations: verified.fixedDbRelations
+          aiValidationLogic: verified.fixedValidationLogic || generatedBlocks.aiValidationLogic,
+          aiDbRelations:     verified.fixedDbRelations     || generatedBlocks.aiDbRelations,
         };
-      } else {
-        this.onThought('[THOUGHT] Virtual Dry Run passed perfectly. Zero data mismatches found.');
-        return generatedBlocks;
       }
-    } catch (e) {
-      this.onThought('[WARNING] Verification parsing failed. Using Pass 1 results.');
+      this.#thought('[PASS-2] Dry run passed — zero issues detected');
+      return generatedBlocks;
+    } catch (err) {
+      this.#thought(`[PASS-2] Verification failed (${err.message}) — using Pass 1 output`);
+      this.emit('error:verify', { error: err.message });
       return generatedBlocks;
     }
   }
-  // --- Autonomous Deployment Engine ---
-  async generateDeploymentConfig(stack, astJsonData) {
-    this.onThought(`[THOUGHT] Generating Autonomous Deployment workflows for [${stack}]...`);
-    const systemPrompt = `Generate a highly optimized docker-compose.yml and a .github/workflows/deploy.yml for a production ${stack} backend.
-Include PostgreSQL, Redis, and best-practice health checks.
-Output JSON:
-{
-  "dockerCompose": "string",
-  "githubWorkflow": "string"
+  // ── Pass 3 (NEW): Architecture review ────────────────────────────────────
+  async reviewArchitecture(generatedBlocks, stack) {
+    this.#assertNotDisposed();
+    this.#thought('[PASS-3] Running architecture review…');
+    const systemPrompt = `You are a senior software architect.
+Review the generated backend code for: SOLID violations, N+1 query risks, missing indexes, security gaps.
+Output ONLY raw JSON.`;
+    const userPrompt = `Stack: ${stack}
+Schema: ${generatedBlocks.prismaSchema?.slice(0, 2000)}
+Validation: ${generatedBlocks.aiValidationLogic?.slice(0, 1000)}
+Output: {
+  "score": number (0-100),
+  "criticalIssues": ["string"],
+  "recommendations": ["string"],
+  "approved": boolean
 }`;
-    const userPrompt = `Target Stack: ${stack}\nAST Endpoints Count: ${astJsonData ? astJsonData.length : 0}`;
+    try {
+      const review = await this.promptModel(systemPrompt, userPrompt, {
+        expectJSON:  true,
+        outputShape: { score: 0, criticalIssues: [], recommendations: [], approved: false },
+      });
+      this.#thought(`[PASS-3] Architecture score: ${review.score}/100 — approved: ${review.approved}`);
+      review.criticalIssues?.forEach(i => this.emit('arch:critical', { issue: i }));
+      return review;
+    } catch (err) {
+      this.#thought(`[PASS-3] Architecture review failed: ${err.message}`);
+      return { score: 0, criticalIssues: [], recommendations: [], approved: true };
+    }
+  }
+  // ── Deployment config generation ──────────────────────────────────────────
+  async generateDeploymentConfig(stack, astJsonData) {
+    this.#assertNotDisposed();
+    this.#thought(`[DEPLOY] Generating deployment config for [${stack}]…`);
+    const systemPrompt = `Generate production-grade docker-compose.yml and GitHub Actions deploy workflow.
+Include: PostgreSQL, Redis, health checks, rolling updates, env var injection.
+Output ONLY raw JSON — no markdown.`;
+    const userPrompt = `Stack: ${stack}
+Endpoint count: ${astJsonData?.length ?? 0}
+Output: { "dockerCompose": "string", "githubWorkflow": "string" }`;
-    const res = await this.promptModel(systemPrompt, userPrompt);
     try {
-      let clean = res;
-      if (clean.includes('```json')) clean = clean.split('```json')[1].split('```')[0].trim();
-      else if (clean.includes('```')) clean = clean.split('```')[1].split('```')[0].trim();
-      const parsed = JSON.parse(clean);
-      this.onThought('[THOUGHT] Deployment workflows synthesized successfully.');
-      return parsed;
-    } catch (e) {
-      return { dockerCompose: "# Fallback Config", githubWorkflow: "# Fallback Workflow" };
+      const result = await this.promptModel(systemPrompt, userPrompt, {
+        expectJSON:  true,
+        outputShape: { dockerCompose: '', githubWorkflow: '' },
+      });
+      this.#thought('[DEPLOY] Deployment config generated');
+      return result;
+    } catch (err) {
+      this.#thought(`[DEPLOY] Failed: ${err.message}`);
+      return {
+        dockerCompose:  '# Generation failed — please create manually',
+        githubWorkflow: '# Generation failed — please create manually',
+      };
     }
   }
-  async dispose() {
-    this.onThought('[THOUGHT] Shutting down AI context...');
-    // Together AI doesn't hold local VRAM or contexts to dispose, doing nothing.
+  // ── NEW: Generate test suite ──────────────────────────────────────────────
+  async generateTestSuite(endpoints, framework = 'vitest') {
+    this.#assertNotDisposed();
+    this.#thought(`[TESTS] Generating ${framework} test suite for ${endpoints.length} endpoints…`);
+    const systemPrompt = `You are a test engineer. Generate ${framework} integration tests.
+Include: happy path, edge cases, auth guards, input validation errors.
+Output ONLY raw JSON.`;
+    const userPrompt = `Endpoints: ${JSON.stringify(endpoints.map(e => ({ method: e.method, route: e.route, schema: e.schemaFields })), null, 2)}
+Output: { "testSuite": "string of ${framework} test code", "testCount": number }`;
+    try {
+      const result = await this.promptModel(systemPrompt, userPrompt, {
+        expectJSON:  true,
+        outputShape: { testSuite: '', testCount: 0 },
+        maxTokens:   8192,
+      });
+      this.#thought(`[TESTS] Generated ${result.testCount ?? '?'} test cases`);
+      return result;
+    } catch (err) {
+      this.#thought(`[TESTS] Test generation failed: ${err.message}`);
+      return { testSuite: '// Test generation failed', testCount: 0 };
+    }
   }
-}
+  // ── Stats & observability ─────────────────────────────────────────────────
+  getStats() {
+    return {
+      cacheSize:      this.#cache.size,
+      circuitState:   this.#circuit.state,
+      tokensUsed:     this.#tokenBudget.used,
+      tokenLimit:     this.#tokenBudget.limit,
+      tokensRemaining: this.#tokenBudget.remaining,
+      disposed:       this.#disposed,
+    };
+  }
+  clearCache() {
+    this.#cache.clear();
+    this.#thought('[CACHE] Cache cleared');
+  }
+  // ── Private helpers ───────────────────────────────────────────────────────
+  #thought(msg) {
+    this.emit('thought', msg);
+  }
+  #assertNotDisposed() {
+    if (this.#disposed) throw new Error('Agent has been disposed — create a new instance');
+  }
+  #isRetryableError(err) {
+    const msg = (err.message ?? '').toLowerCase();
+    return (
+      msg.includes('rate limit') ||
+      msg.includes('timeout')    ||
+      msg.includes('503')        ||
+      msg.includes('502')        ||
+      msg.includes('econnreset') ||
+      msg.includes('network')    ||
+      (err.status >= 500 && err.status < 600)
+    );
+  }
+}