npm - forgedev - Versions diffs - 1.2.0 → 1.3.0 - Mend

forgedev 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (171) hide show

package/templates/claude-code/hooks/scripts/code-hygiene.mjs ADDED Viewed

@@ -0,0 +1,293 @@
+#!/usr/bin/env node
+// Code Hygiene Gate — runs on Stop to enforce structural quality
+// Checks: file length, duplicate code blocks, repeated functions, stale test files
+// Exit 0 = pass (with warnings), Exit 2 = blocked (critical issues found)
+import { readFileSync, readdirSync, statSync, existsSync } from 'node:fs';
+import { join, relative, extname, basename } from 'node:path';
+// ── Config ──────────────────────────────────────────────────────────────────
+const MAX_FILE_LINES = 300;
+const MAX_FUNCTION_LINES = 50;
+const MIN_DUPLICATE_LINES = 6;       // minimum consecutive matching lines to flag
+const MAX_FILES_PER_DIR = 20;        // warn if a single directory has too many files
+const SOURCE_EXTENSIONS = new Set(['.ts', '.tsx', '.js', '.jsx', '.py', '.go', '.rs']);
+const IGNORE_DIRS = new Set(['node_modules', '.next', '__pycache__', '.git', 'dist', 'build', '.claude', 'coverage', '.venv', 'venv']);
+const IGNORE_FILES = new Set(['package-lock.json', 'pnpm-lock.yaml', 'yarn.lock']);
+// ── Helpers ─────────────────────────────────────────────────────────────────
+function walk(dir, files = []) {
+  let entries;
+  try { entries = readdirSync(dir, { withFileTypes: true }); } catch { return files; }
+  for (const entry of entries) {
+    if (IGNORE_DIRS.has(entry.name)) continue;
+    const full = join(dir, entry.name);
+    if (entry.isDirectory()) {
+      walk(full, files);
+    } else if (SOURCE_EXTENSIONS.has(extname(entry.name)) && !IGNORE_FILES.has(entry.name)) {
+      files.push(full);
+    }
+  }
+  return files;
+}
+function readLines(filePath) {
+  try { return readFileSync(filePath, 'utf-8').split('\n'); } catch { return []; }
+}
+// ── Checks ──────────────────────────────────────────────────────────────────
+function checkFileLengths(files, cwd) {
+  const warnings = [];
+  for (const file of files) {
+    const lines = readLines(file);
+    if (lines.length > MAX_FILE_LINES) {
+      warnings.push({
+        level: lines.length > MAX_FILE_LINES * 2 ? 'critical' : 'warning',
+        file: relative(cwd, file),
+        message: `${lines.length} lines (limit: ${MAX_FILE_LINES}). Split into smaller, focused modules.`,
+      });
+    }
+  }
+  return warnings;
+}
+function checkFunctionLengths(files, cwd) {
+  const warnings = [];
+  // Match common function declarations across JS/TS/Python
+  const fnPatterns = [
+    /^(?:export\s+)?(?:async\s+)?function\s+(\w+)/,          // function foo()
+    /^(?:export\s+)?(?:const|let)\s+(\w+)\s*=\s*(?:async\s*)?\(/,  // const foo = (
+    /^(?:export\s+)?(?:const|let)\s+(\w+)\s*=\s*(?:async\s*)?(?:\([^)]*\)|[^=])\s*=>/,  // const foo = () =>
+    /^\s+(?:async\s+)?(\w+)\s*\([^)]*\)\s*\{/,               // method() {
+    /^(?:async\s+)?def\s+(\w+)/,                               // def foo (Python)
+  ];
+  for (const file of files) {
+    const fileExt = extname(file);
+    if (fileExt === '.py') continue;
+    const lines = readLines(file);
+    let currentFn = null;
+    let fnStart = 0;
+    let braceDepth = 0;
+    let inFunction = false;
+    for (let i = 0; i < lines.length; i++) {
+      const line = lines[i];
+      // Check if this line starts a new function
+      let newFnStarted = false;
+      for (const pattern of fnPatterns) {
+        const match = line.match(pattern);
+        if (match) {
+          // Close previous function if open
+          if (inFunction && currentFn) {
+            const len = i - fnStart;
+            if (len > MAX_FUNCTION_LINES) {
+              warnings.push({
+                level: 'warning',
+                file: relative(cwd, file),
+                message: `Function "${currentFn}" is ${len} lines (limit: ${MAX_FUNCTION_LINES}) at line ${fnStart + 1}. Extract helper functions.`,
+              });
+            }
+          }
+          currentFn = match[1];
+          fnStart = i;
+          inFunction = true;
+          braceDepth = 0;
+          newFnStarted = true;
+          break;
+        }
+      }
+      // Skip brace tracking on the line that started a new function
+      if (newFnStarted) continue;
+      // Track brace depth for JS/TS
+      if (inFunction) {
+        for (const ch of line) {
+          if (ch === '{') braceDepth++;
+          if (ch === '}') braceDepth--;
+        }
+        if (braceDepth <= 0 && i > fnStart && line.trim()) {
+          const len = i - fnStart + 1;
+          if (len > MAX_FUNCTION_LINES) {
+            warnings.push({
+              level: 'warning',
+              file: relative(cwd, file),
+              message: `Function "${currentFn}" is ${len} lines (limit: ${MAX_FUNCTION_LINES}) at line ${fnStart + 1}. Extract helper functions.`,
+            });
+          }
+          inFunction = false;
+          currentFn = null;
+        }
+      }
+    }
+  }
+  return warnings;
+}
+function checkDuplicateBlocks(files, cwd) {
+  const warnings = [];
+  // Build a map of normalized line sequences -> locations
+  const blockMap = new Map();
+  for (const file of files) {
+    const lines = readLines(file).map(l => l.trim()).filter(l => l && !l.startsWith('//') && !l.startsWith('#') && !l.startsWith('*') && !l.startsWith('import') && !l.startsWith('from'));
+    // Sliding window of MIN_DUPLICATE_LINES
+    for (let i = 0; i <= lines.length - MIN_DUPLICATE_LINES; i++) {
+      const block = lines.slice(i, i + MIN_DUPLICATE_LINES).join('\n');
+      // Skip trivial blocks (mostly braces, returns, empty patterns)
+      if (block.replace(/[{}\s();\n]/g, '').length < 30) continue;
+      if (!blockMap.has(block)) {
+        blockMap.set(block, []);
+      }
+      blockMap.get(block).push({ file: relative(cwd, file), line: i + 1 });
+    }
+  }
+  // Report blocks found in multiple files
+  const reported = new Set();
+  for (const [block, locations] of blockMap) {
+    const uniqueFiles = [...new Set(locations.map(l => l.file))];
+    if (uniqueFiles.length < 2) continue;
+    const key = uniqueFiles.sort().join('|');
+    if (reported.has(key)) continue;
+    reported.add(key);
+    const preview = block.split('\n')[0].substring(0, 60);
+    warnings.push({
+      level: 'warning',
+      file: uniqueFiles[0],
+      message: `Duplicate code block found in ${uniqueFiles.length} files: "${preview}..." Also in: ${uniqueFiles.slice(1).join(', ')}. Extract to a shared utility.`,
+    });
+    // Limit duplicate reports to avoid noise
+    if (warnings.length > 5) break;
+  }
+  return warnings;
+}
+function checkDirectoryBloat(cwd) {
+  const warnings = [];
+  function checkDir(dir) {
+    let entries;
+    try { entries = readdirSync(dir, { withFileTypes: true }); } catch { return; }
+    const sourceFiles = entries.filter(e =>
+      !e.isDirectory() && SOURCE_EXTENSIONS.has(extname(e.name)) && !IGNORE_FILES.has(e.name)
+    );
+    if (sourceFiles.length > MAX_FILES_PER_DIR) {
+      warnings.push({
+        level: 'warning',
+        file: relative(cwd, dir) || '.',
+        message: `Directory has ${sourceFiles.length} source files (limit: ${MAX_FILES_PER_DIR}). Consider grouping into subdirectories by feature or domain.`,
+      });
+    }
+    for (const entry of entries) {
+      if (entry.isDirectory() && !IGNORE_DIRS.has(entry.name)) {
+        checkDir(join(dir, entry.name));
+      }
+    }
+  }
+  checkDir(cwd);
+  return warnings;
+}
+function checkStaleTests(files, cwd) {
+  const warnings = [];
+  const sourceFiles = new Set(
+    files
+      .filter(f => !basename(f).includes('.test.') && !basename(f).includes('.spec.') && !f.includes('__tests__'))
+      .map(f => basename(f).replace(extname(f), ''))
+  );
+  const testFiles = files.filter(f =>
+    basename(f).includes('.test.') || basename(f).includes('.spec.') || f.includes('__tests__')
+  );
+  for (const testFile of testFiles) {
+    const testBase = basename(testFile)
+      .replace('.test', '')
+      .replace('.spec', '')
+      .replace(extname(testFile), '');
+    // If the source file this test corresponds to doesn't exist, flag it
+    if (testBase && !sourceFiles.has(testBase)) {
+      const lines = readLines(testFile);
+      // Check if the test file imports something that doesn't exist
+      const imports = lines.filter(l => l.includes('import') && l.includes('from'));
+      let hasDeadImport = false;
+      for (const imp of imports) {
+        const match = imp.match(/from\s+['"]([^'"]+)['"]/);
+        if (match && match[1].startsWith('.')) {
+          // Relative import - check if the file exists
+          const importPath = join(testFile, '..', match[1]);
+          const extensions = ['.ts', '.tsx', '.js', '.jsx', '.py', ''];
+          const exists = extensions.some(ext => existsSync(importPath + ext) || existsSync(importPath));
+          if (!exists) hasDeadImport = true;
+        }
+      }
+      if (hasDeadImport) {
+        warnings.push({
+          level: 'warning',
+          file: relative(cwd, testFile),
+          message: `Test file may be stale — imports reference files that no longer exist. Review and delete if no longer needed.`,
+        });
+      }
+    }
+  }
+  return warnings;
+}
+// ── Main ────────────────────────────────────────────────────────────────────
+const cwd = process.cwd();
+const files = walk(cwd);
+const allWarnings = [
+  ...checkFileLengths(files, cwd),
+  ...checkFunctionLengths(files, cwd),
+  ...checkDuplicateBlocks(files, cwd),
+  ...checkDirectoryBloat(cwd),
+  ...checkStaleTests(files, cwd),
+];
+const criticals = allWarnings.filter(w => w.level === 'critical');
+const warnings = allWarnings.filter(w => w.level === 'warning');
+if (allWarnings.length === 0) {
+  process.stderr.write('\n[code-hygiene] All clean. No structural issues found.\n');
+  process.exit(0);
+}
+process.stderr.write('\n[code-hygiene] Structural quality report:\n\n');
+for (const w of criticals) {
+  process.stderr.write(`  CRITICAL  ${w.file}\n    ${w.message}\n\n`);
+}
+for (const w of warnings) {
+  process.stderr.write(`  WARNING   ${w.file}\n    ${w.message}\n\n`);
+}
+process.stderr.write(`  Summary: ${criticals.length} critical, ${warnings.length} warnings\n\n`);
+if (criticals.length > 0) {
+  process.stderr.write('[code-hygiene] BLOCKED: Fix critical issues before completing.\n');
+  process.stderr.write('  Tip: Use /simplify to auto-refactor long files and extract shared utilities.\n');
+  process.exit(2);
+}
+process.stderr.write('[code-hygiene] Passed with warnings. Consider running /simplify to clean up.\n');
+process.exit(0);

package/templates/claude-code/hooks/scripts/pre-commit-gate.mjs ADDED Viewed

@@ -0,0 +1,207 @@
+#!/usr/bin/env node
+/**
+ * Pre-commit gate — the single quality checkpoint before any commit.
+ * Replaces /done. Triggered by PreToolUse hook on Bash when command contains "git commit".
+ *
+ * Phase 1 (automated, fast):
+ *   - No .env files or secrets staged
+ *   - No merge conflict markers
+ *   - Lint passes
+ *   - Tests pass
+ *
+ * Phase 2 (agent review):
+ *   - Tells Claude to run code-quality-reviewer and security-reviewer on changed files
+ *   - Only triggers if Phase 1 passes and changed files haven't been reviewed yet
+ *
+ * Exit 0 = allow commit
+ * Exit 2 = block commit with message
+ */
+import { execSync } from 'node:child_process';
+import fs from 'node:fs';
+import path from 'node:path';
+const input = process.env.CLAUDE_TOOL_INPUT || '{}';
+let parsed;
+try {
+  parsed = JSON.parse(input);
+} catch {
+  process.exit(0);
+}
+const command = parsed.command || '';
+// Only gate actual git commit commands
+if (!command.match(/git\s+commit/)) {
+  process.exit(0);
+}
+// Skip amend-only or empty commits
+if (command.includes('--allow-empty')) {
+  process.exit(0);
+}
+const errors = [];
+// Get staged files once, reuse across checks
+let stagedFiles = '';
+try {
+  stagedFiles = execSync('git diff --cached --name-only', { encoding: 'utf-8' }).trim();
+} catch {
+  // git not available
+}
+// === PHASE 1: Automated checks ===
+// Check 1: No secrets staged
+if (stagedFiles) {
+  const secretFilePatterns = ['.env', 'credentials', '.pem', '.key'];
+  const secretNamePatterns = ['secret'];
+  const flagged = stagedFiles.split('\n').filter(f => {
+    const base = path.basename(f).toLowerCase();
+    if (f.includes('.chainproof/keys/public.pem')) return false;
+    // Always flag secret file extensions regardless of language
+    if (secretFilePatterns.some(p => base.includes(p))) return true;
+    return secretNamePatterns.some(p => base.includes(p));
+  });
+  if (flagged.length > 0) {
+    errors.push(`Potential secrets staged: ${flagged.join(', ')}\nUnstage these files or confirm they are safe.`);
+  }
+}
+// Check 2: No merge conflict markers
+try {
+  const staged = execSync('git diff --cached', { encoding: 'utf-8' });
+  if (staged.includes('<<<<<<<') || staged.includes('>>>>>>>')) {
+    errors.push('Merge conflict markers found in staged changes. Resolve conflicts first.');
+  }
+} catch {
+  // skip
+}
+// Helper: run a shell command, push to errors on failure
+function tryExec(cmd, label, timeout = 30000) {
+  try {
+    execSync(`${cmd} 2>&1`, { encoding: 'utf-8', timeout });
+    return true;
+  } catch (e) {
+    if (e.status || e.killed) {
+      const msg = e.killed ? 'Command timed out' : (e.stdout || e.message);
+      errors.push(`${label}:\n${String(msg).slice(0, 500)}`);
+    }
+    return false;
+  }
+}
+function readPkg(file = 'package.json') {
+  try {
+    return JSON.parse(fs.readFileSync(file, 'utf-8'));
+  } catch {
+    return null;
+  }
+}
+// Check 3: Lint
+function runLint() {
+  if (fs.existsSync('package.json')) {
+    const pkg = readPkg();
+    if (!pkg) { errors.push('Lint skipped: package.json is malformed JSON'); return; }
+    if (pkg.scripts?.lint) { tryExec('npm run lint', 'Lint failed'); return; }
+    tryExec('npx eslint . --ext .js,.ts,.tsx', 'Lint failed');
+    return;
+  }
+  if (fs.existsSync('requirements.txt') || fs.existsSync('pyproject.toml')) {
+    tryExec('ruff check .', 'Lint failed');
+    return;
+  }
+  if (fs.existsSync('frontend/package.json')) {
+    tryExec('cd frontend && npx eslint .', 'Frontend lint failed');
+  }
+  if (fs.existsSync('backend/requirements.txt')) {
+    tryExec('cd backend && ruff check .', 'Backend lint failed');
+  }
+}
+// Check 4: Tests
+function runTests() {
+  const testTimeout = 120000;
+  if (fs.existsSync('package.json')) {
+    const pkg = readPkg();
+    if (!pkg) { errors.push('Tests skipped: package.json is malformed JSON'); return; }
+    if (pkg.scripts?.test) { tryExec('npm test', 'Tests failed', testTimeout); return; }
+    tryExec('npx vitest run', 'Tests failed', testTimeout);
+    return;
+  }
+  if (fs.existsSync('requirements.txt') || fs.existsSync('pyproject.toml')) {
+    tryExec('pytest', 'Tests failed', testTimeout);
+    return;
+  }
+  if (fs.existsSync('frontend/package.json')) {
+    tryExec('cd frontend && npx vitest run', 'Frontend tests failed', testTimeout);
+  }
+  if (fs.existsSync('backend/requirements.txt')) {
+    tryExec('cd backend && pytest', 'Backend tests failed', testTimeout);
+  }
+}
+runLint();
+runTests();
+if (errors.length > 0) {
+  console.error('[pre-commit] BLOCKED - fix these issues first:\n');
+  errors.forEach(e => console.error(`  ${e}\n`));
+  console.error('Run /build-fix to auto-resolve lint and build errors.');
+  process.exit(2);
+}
+// === PHASE 2: Agent review gate ===
+// Check if changed files have been reviewed in this session.
+// We use a marker file that gets created when agents complete review.
+const reviewMarker = '.claude/.last-review';
+let needsReview = false;
+try {
+  if (!stagedFiles) {
+    // Nothing staged, allow
+    process.exit(0);
+  }
+  const stagedList = stagedFiles.split('\n').filter(f =>
+    f.endsWith('.js') || f.endsWith('.ts') || f.endsWith('.tsx') ||
+    f.endsWith('.py') || f.endsWith('.jsx') || f.endsWith('.mjs')
+  );
+  if (stagedList.length === 0) {
+    // No code files staged (just docs/config), skip review
+    process.exit(0);
+  }
+  if (fs.existsSync(reviewMarker)) {
+    try {
+      const review = JSON.parse(fs.readFileSync(reviewMarker, 'utf-8'));
+      const reviewedFiles = new Set(review.files || []);
+      const unreviewed = stagedList.filter(f => !reviewedFiles.has(f));
+      if (unreviewed.length > 0) {
+        needsReview = true;
+      }
+    } catch {
+      needsReview = true;
+    }
+  } else {
+    needsReview = true;
+  }
+} catch {
+  // Can't determine, allow
+  process.exit(0);
+}
+if (needsReview) {
+  console.error('[pre-commit] Code review required before commit.\n');
+  console.error('Run code-quality-reviewer and security-reviewer agents on the changed files,');
+  console.error('then retry the commit. Or run /code-review to do this automatically.\n');
+  console.error('To mark files as reviewed, the agents will update .claude/.last-review.');
+  process.exit(2);
+}

package/templates/claude-code/hooks/typescript.json CHANGED Viewed

@@ -9,6 +9,15 @@
             "command": "node .claude/hooks/guard-protected-files.mjs"
           }
         ]
+      },
+      {
+        "matcher": "Bash",
+        "hooks": [
+          {
+            "type": "command",
+            "command": "node .claude/hooks/pre-commit-gate.mjs"
+          }
+        ]
       }
     ],
     "PostToolUse": [
@@ -27,7 +36,7 @@
         "hooks": [
           {
             "type": "command",
-            "command": "npx tsc --noEmit 2>&1 && npx eslint . 2>&1"
+            "command": "node .claude/hooks/code-hygiene.mjs"
           }
         ]
       }

package/templates/claude-code/skills/ai-prompts/SKILL.md CHANGED Viewed

@@ -1,44 +1,122 @@
 ---
-name: AI Prompts
-description: AI/LLM integration patterns and best practices
+name: ai-prompts
+description: AI/LLM integration patterns, guardrails infrastructure, and compliance (EU AI Act, NIST AI RMF)
 ---
-# AI/LLM Integration
-## Structured Output
-- Always validate AI responses with Pydantic (Python) or Zod (TypeScript)
-- Never use raw string responses in application logic
-- Define response schemas before making API calls
-- Handle malformed responses gracefully
-## Prompt Engineering
-- Use system prompts for consistent behavior
-- Include examples (few-shot) for complex tasks
-- Be specific about output format
-- Test prompts with edge cases
-## Failover Patterns
-- Implement rule-based fallback when AI is unavailable
-- Set aggressive timeouts (30-60s for most calls)
-- Retry with exponential backoff (max 3 attempts)
-- Cache responses when appropriate
-- Monitor token usage and costs
-## Rate Limiting
-- Implement client-side rate limiting before API calls
-- Queue requests during high load
-- Use batch APIs when processing multiple items
-- Handle 429 responses gracefully
-## Security
-- Never include user secrets in prompts
-- Sanitize user input before including in prompts
-- Protect against prompt injection attacks (delimiter tokens, input validation, output filtering)
-- Validate and sanitize AI output before using
-- Don't trust AI output for security decisions
-## Testing
-- Use golden datasets for regression testing
-- Mock AI responses in unit tests
-- Test timeout and error handling paths
-- Monitor response quality over time
+# AI/LLM Integration & Guardrails
+This project includes AI guardrails infrastructure in `src/lib/ai/` (TypeScript) or `app/ai/` (Python).
+## Using the AI Client
+All AI calls MUST go through the guardrails client. Never call the Anthropic SDK directly.
+**TypeScript:**
+```typescript
+import { getAIClient } from '@/lib/ai';
+import { z } from 'zod';
+const ai = getAIClient();
+const result = await ai.generate({
+  prompt: 'Analyze this text',
+  schema: z.object({ sentiment: z.enum(['positive', 'negative', 'neutral']), confidence: z.number() }),
+  purpose: 'sentiment-analysis',  // Required for audit trail
+});
+if (result.needsHumanReview) {
+  // Route to approval queue — confidence below threshold
+}
+```
+**Python:**
+```python
+from app.ai import get_ai_client
+from pydantic import BaseModel
+class Sentiment(BaseModel):
+    sentiment: str
+    confidence: float
+ai = get_ai_client()
+result = await ai.generate(prompt="Analyze this text", schema=Sentiment, purpose="sentiment-analysis")
+if result.needs_human_review:
+    # Route to approval queue
+```
+## Guardrails Architecture
+| Layer | What It Does | Compliance |
+|-------|-------------|------------|
+| **Input Guard** | Prompt injection detection, input sanitization, length limits | EU AI Act Art. 15, NIST Manage 2.2 |
+| **Output Validation** | Zod/Pydantic schema validation with retry on parse failure | NIST Manage 2.4 |
+| **Confidence Scoring** | Scores each response (0-1), routes low confidence to human review | NIST Measure 2.5, EU AI Act Art. 14 |
+| **Audit Logger** | Structured logging of every AI interaction (input preview, output, confidence, model, latency) | EU AI Act Art. 12, NIST Manage 1.3 |
+| **Health Metrics** | AI-specific health endpoint: availability, confidence distribution, error rates, per-model stats | NIST Manage 3.2, EU AI Act Art. 9 |
+| **AI Disclosure** | All AI responses carry `aiGenerated: true` flag | EU AI Act Art. 50 |
+## Rules
+- **Never call the Anthropic/OpenAI SDK directly.** Always use `getAIClient()` / `get_ai_client()`
+- **Never use raw string responses in application logic.** Always validate with Zod/Pydantic schemas
+- **Never skip the `purpose` parameter.** Every AI call must be tagged for audit traceability
+- **Never trust AI output for security decisions.** Always validate independently
+- **Never log full prompts.** The audit logger captures only a 200-char preview to avoid PII leakage
+- **Always handle `needsHumanReview`.** If confidence is below threshold, the response must be reviewed before acting on it
+## Confidence Thresholds
+| Confidence | Action | Use Case |
+|-----------|--------|----------|
+| > 0.9 | Auto-accept | Low-risk: summaries, formatting, classification |
+| 0.7 - 0.9 | Accept with logging | Medium-risk: recommendations, content generation |
+| 0.5 - 0.7 | Flag for review | High-risk: decisions, user-facing content |
+| < 0.5 | Require human approval | Critical: financial, medical, legal, safety |
+Configure the threshold per-call via `confidenceThreshold` parameter.
+## Prompt Injection Protection
+The input guard detects:
+- **Instruction override**: "ignore previous instructions", "disregard your rules"
+- **Role manipulation**: "you are now a...", "pretend to be..."
+- **System prompt extraction**: "show me your system prompt", "repeat your instructions"
+- **Delimiter injection**: `<system>`, `[INST]`, `<|im_start|>`
+- **Data exfiltration**: "send this data to..."
+Detected injections are blocked and logged. Suspicious patterns (encoded payloads, code execution) are logged but not blocked.
+## AI Health Endpoint
+Mount at `/api/ai/health`. Returns:
+```json
+{
+  "status": "ok | degraded | unhealthy",
+  "aiAvailable": true,
+  "metrics": {
+    "totalCalls": 142,
+    "successRate": 0.97,
+    "avgConfidence": 0.84,
+    "avgLatencyMs": 1230,
+    "lowConfidenceRate": 0.08,
+    "errorRate": 0.03
+  },
+  "models": {
+    "claude-sonnet-4-20250514": { "calls": 142, "successRate": 0.97, "avgLatencyMs": 1230 }
+  }
+}
+```
+## Recommended Libraries
+**TypeScript:** Zod (validation), @anthropic-ai/sdk (model calls), @instructor-ai/instructor (structured extraction)
+**Python:** Pydantic (validation), anthropic (model calls), instructor (structured extraction), guardrails-ai (advanced validation), presidio (PII detection)
+**Observability:** Langfuse (open-source LLM tracing), OpenTelemetry (spans), Helicone (proxy logging)
+## Testing AI Integrations
+- Mock AI responses in unit tests — never make real API calls in CI
+- Use golden datasets for regression testing prompt quality
+- Test timeout, retry, and error handling paths explicitly
+- Test with adversarial inputs (prompt injection patterns)
+- Monitor confidence score distribution for drift over time