forgedev 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. package/README.md +57 -10
  2. package/bin/chainproof.js +126 -0
  3. package/package.json +25 -7
  4. package/src/chainproof-bridge.js +330 -0
  5. package/src/ci-mode.js +85 -0
  6. package/src/claude-configurator.js +86 -49
  7. package/src/cli.js +30 -7
  8. package/src/composer.js +159 -34
  9. package/src/doctor-checks-chainproof.js +106 -0
  10. package/src/doctor-checks.js +39 -20
  11. package/src/doctor-prompts.js +9 -9
  12. package/src/doctor.js +37 -4
  13. package/src/guided.js +3 -3
  14. package/src/index.js +31 -10
  15. package/src/init-mode.js +64 -11
  16. package/src/menu.js +178 -0
  17. package/src/prompts.js +5 -12
  18. package/src/recommender.js +134 -10
  19. package/src/scanner.js +57 -2
  20. package/src/uat-generator.js +204 -189
  21. package/src/update-check.js +9 -4
  22. package/src/update.js +1 -1
  23. package/src/utils.js +64 -5
  24. package/templates/ai/guardrails-py/backend/app/ai/__init__.py +29 -0
  25. package/templates/ai/guardrails-py/backend/app/ai/audit_log.py +133 -0
  26. package/templates/ai/guardrails-py/backend/app/ai/client.py.template +323 -0
  27. package/templates/ai/guardrails-py/backend/app/ai/health.py.template +157 -0
  28. package/templates/ai/guardrails-py/backend/app/ai/input_guard.py +98 -0
  29. package/templates/ai/guardrails-ts/src/lib/ai/audit-log.ts.template +164 -0
  30. package/templates/ai/guardrails-ts/src/lib/ai/client.ts.template +403 -0
  31. package/templates/ai/guardrails-ts/src/lib/ai/health.ts.template +165 -0
  32. package/templates/ai/guardrails-ts/src/lib/ai/index.ts.template +17 -0
  33. package/templates/ai/guardrails-ts/src/lib/ai/input-guard.ts.template +124 -0
  34. package/templates/auth/nextauth/src/lib/auth.ts.template +12 -7
  35. package/templates/backend/express/Dockerfile.template +18 -0
  36. package/templates/backend/express/package.json.template +33 -0
  37. package/templates/backend/express/src/index.ts.template +34 -0
  38. package/templates/backend/express/src/routes/health.ts.template +27 -0
  39. package/templates/backend/express/tsconfig.json +17 -0
  40. package/templates/backend/fastapi/backend/Dockerfile.template +5 -0
  41. package/templates/backend/fastapi/backend/app/api/health.py.template +1 -1
  42. package/templates/backend/fastapi/backend/app/core/config.py.template +1 -1
  43. package/templates/backend/fastapi/backend/app/core/errors.py +1 -1
  44. package/templates/backend/fastapi/backend/app/main.py.template +3 -1
  45. package/templates/backend/fastapi/backend/requirements.txt.template +2 -0
  46. package/templates/backend/hono/Dockerfile.template +18 -0
  47. package/templates/backend/hono/package.json.template +31 -0
  48. package/templates/backend/hono/src/index.ts.template +32 -0
  49. package/templates/backend/hono/src/routes/health.ts.template +27 -0
  50. package/templates/backend/hono/tsconfig.json +18 -0
  51. package/templates/base/docs/uat/UAT_TEMPLATE.md.template +1 -1
  52. package/templates/chainproof/base/.chainproof/config.json.template +11 -0
  53. package/templates/chainproof/base/.chainproof/mcp-server.mjs +310 -0
  54. package/templates/chainproof/base/.mcp.json +9 -0
  55. package/templates/chainproof/fastapi/.chainproof/middleware.json.template +14 -0
  56. package/templates/chainproof/nextjs/.chainproof/hooks.json.template +19 -0
  57. package/templates/chainproof/polyglot/.chainproof/config.json.template +21 -0
  58. package/templates/claude-code/agents/architect.md +25 -11
  59. package/templates/claude-code/agents/build-error-resolver.md +19 -5
  60. package/templates/claude-code/agents/chief-of-staff.md +42 -8
  61. package/templates/claude-code/agents/code-quality-reviewer.md +14 -0
  62. package/templates/claude-code/agents/database-reviewer.md +15 -1
  63. package/templates/claude-code/agents/deep-reviewer.md +191 -0
  64. package/templates/claude-code/agents/doc-updater.md +19 -5
  65. package/templates/claude-code/agents/docs-lookup.md +19 -5
  66. package/templates/claude-code/agents/e2e-runner.md +26 -12
  67. package/templates/claude-code/agents/enforcement-gate.md +102 -0
  68. package/templates/claude-code/agents/frontend-builder.md +188 -0
  69. package/templates/claude-code/agents/harness-optimizer.md +36 -1
  70. package/templates/claude-code/agents/loop-operator.md +27 -13
  71. package/templates/claude-code/agents/planner.md +21 -7
  72. package/templates/claude-code/agents/product-strategist.md +24 -10
  73. package/templates/claude-code/agents/production-readiness.md +14 -0
  74. package/templates/claude-code/agents/prompt-auditor.md +115 -0
  75. package/templates/claude-code/agents/refactor-cleaner.md +22 -8
  76. package/templates/claude-code/agents/security-reviewer.md +14 -0
  77. package/templates/claude-code/agents/spec-validator.md +15 -1
  78. package/templates/claude-code/agents/tdd-guide.md +21 -7
  79. package/templates/claude-code/agents/uat-validator.md +14 -0
  80. package/templates/claude-code/claude-md/base.md +14 -7
  81. package/templates/claude-code/claude-md/fastapi.md +8 -8
  82. package/templates/claude-code/claude-md/fullstack.md +6 -6
  83. package/templates/claude-code/claude-md/hono.md +18 -0
  84. package/templates/claude-code/claude-md/nextjs.md +5 -5
  85. package/templates/claude-code/claude-md/remix.md +18 -0
  86. package/templates/claude-code/commands/audit-security.md +14 -0
  87. package/templates/claude-code/commands/audit-spec.md +14 -0
  88. package/templates/claude-code/commands/audit-wiring.md +14 -0
  89. package/templates/claude-code/commands/build-fix.md +28 -0
  90. package/templates/claude-code/commands/build-ui.md +59 -0
  91. package/templates/claude-code/commands/code-review.md +53 -31
  92. package/templates/claude-code/commands/fix-loop.md +211 -0
  93. package/templates/claude-code/commands/full-audit.md +36 -8
  94. package/templates/claude-code/commands/generate-prd.md +1 -1
  95. package/templates/claude-code/commands/generate-sdd.md +74 -0
  96. package/templates/claude-code/commands/generate-uat.md +107 -35
  97. package/templates/claude-code/commands/help.md +68 -0
  98. package/templates/claude-code/commands/live-uat.md +268 -0
  99. package/templates/claude-code/commands/optimize-claude-md.md +15 -1
  100. package/templates/claude-code/commands/plan.md +3 -3
  101. package/templates/claude-code/commands/pre-pr.md +57 -19
  102. package/templates/claude-code/commands/product-strategist.md +21 -0
  103. package/templates/claude-code/commands/resume-session.md +10 -10
  104. package/templates/claude-code/commands/run-uat.md +59 -2
  105. package/templates/claude-code/commands/save-session.md +10 -10
  106. package/templates/claude-code/commands/simplify.md +36 -0
  107. package/templates/claude-code/commands/tdd.md +17 -18
  108. package/templates/claude-code/commands/verify-all.md +24 -0
  109. package/templates/claude-code/commands/verify-intent.md +55 -0
  110. package/templates/claude-code/commands/workflows.md +52 -40
  111. package/templates/claude-code/hooks/polyglot.json +10 -1
  112. package/templates/claude-code/hooks/python.json +10 -1
  113. package/templates/claude-code/hooks/scripts/autofix-polyglot.mjs +2 -2
  114. package/templates/claude-code/hooks/scripts/autofix-python.mjs +1 -1
  115. package/templates/claude-code/hooks/scripts/autofix-typescript.mjs +1 -1
  116. package/templates/claude-code/hooks/scripts/code-hygiene.mjs +293 -0
  117. package/templates/claude-code/hooks/scripts/pre-commit-gate.mjs +207 -0
  118. package/templates/claude-code/hooks/typescript.json +10 -1
  119. package/templates/claude-code/skills/ai-prompts/SKILL.md +119 -41
  120. package/templates/claude-code/skills/git-workflow/SKILL.md +5 -5
  121. package/templates/claude-code/skills/nextjs/SKILL.md +1 -1
  122. package/templates/claude-code/skills/playwright/SKILL.md +5 -5
  123. package/templates/claude-code/skills/security-api/SKILL.md +1 -1
  124. package/templates/claude-code/skills/security-web/SKILL.md +1 -1
  125. package/templates/claude-code/skills/testing-patterns/SKILL.md +9 -9
  126. package/templates/database/prisma-postgres/{.env.example → .env.example.template} +1 -0
  127. package/templates/database/sqlalchemy-postgres/{.env.example → .env.example.template} +1 -0
  128. package/templates/docs-portal/fastapi/backend/app/portal/__pycache__/docs_reader.cpython-314.pyc +0 -0
  129. package/templates/docs-portal/fastapi/backend/app/portal/docs_reader.py +201 -0
  130. package/templates/docs-portal/fastapi/backend/app/portal/html_renderer.py +229 -0
  131. package/templates/docs-portal/fastapi/backend/app/portal/router.py.template +35 -0
  132. package/templates/docs-portal/nextjs/src/app/portal/[category]/[slug]/page.tsx +81 -0
  133. package/templates/docs-portal/nextjs/src/app/portal/[category]/page.tsx +65 -0
  134. package/templates/docs-portal/nextjs/src/app/portal/layout.tsx.template +54 -0
  135. package/templates/docs-portal/nextjs/src/app/portal/page.tsx +85 -0
  136. package/templates/docs-portal/nextjs/src/components/portal/markdown-renderer.tsx +101 -0
  137. package/templates/docs-portal/nextjs/src/components/portal/mobile-portal-nav.tsx +81 -0
  138. package/templates/docs-portal/nextjs/src/components/portal/portal-nav.tsx +86 -0
  139. package/templates/docs-portal/nextjs/src/lib/docs.ts +139 -0
  140. package/templates/frontend/nextjs/package.json.template +3 -1
  141. package/templates/frontend/react/index.html.template +12 -0
  142. package/templates/frontend/react/package.json.template +34 -0
  143. package/templates/frontend/react/src/App.tsx.template +10 -0
  144. package/templates/frontend/react/src/index.css +1 -0
  145. package/templates/frontend/react/src/main.tsx +10 -0
  146. package/templates/frontend/react/tsconfig.json +17 -0
  147. package/templates/frontend/react/vite.config.ts.template +15 -0
  148. package/templates/frontend/react/vitest.config.ts +9 -0
  149. package/templates/frontend/remix/app/root.tsx.template +31 -0
  150. package/templates/frontend/remix/app/routes/_index.tsx.template +19 -0
  151. package/templates/frontend/remix/app/routes/api.health.ts.template +10 -0
  152. package/templates/frontend/remix/app/tailwind.css +1 -0
  153. package/templates/frontend/remix/package.json.template +39 -0
  154. package/templates/frontend/remix/tsconfig.json +18 -0
  155. package/templates/frontend/remix/vite.config.ts.template +7 -0
  156. package/templates/infra/github-actions/.github/workflows/ci.yml.template +3 -0
  157. package/docs/00-README.md +0 -310
  158. package/docs/01-universal-prompt-library.md +0 -1049
  159. package/docs/02-claude-code-mastery-playbook.md +0 -283
  160. package/docs/03-multi-agent-verification.md +0 -565
  161. package/docs/04-errata-and-verification-checklist.md +0 -284
  162. package/docs/05-universal-scaffolder-vision.md +0 -452
  163. package/docs/06-confidence-assessment-and-repo-prompt.md +0 -407
  164. package/docs/errata.md +0 -58
  165. package/docs/multi-agent-verification.md +0 -66
  166. package/docs/playbook.md +0 -95
  167. package/docs/prompt-library.md +0 -160
  168. package/docs/uat/UAT_CHECKLIST.csv +0 -9
  169. package/docs/uat/UAT_TEMPLATE.md +0 -163
  170. package/templates/claude-code/commands/done.md +0 -19
  171. /package/{docs/plans/.gitkeep → templates/docs-portal/fastapi/backend/app/portal/__init__.py} +0 -0
@@ -0,0 +1,133 @@
1
+ """AI Audit Logger — Structured logging of all AI interactions.
2
+
3
+ Compliance: EU AI Act Art. 12 (logging and traceability),
4
+ NIST AI RMF Manage 1.3 (monitoring)
5
+ """
6
+
7
+ import json
8
+ import logging
9
+ import threading
10
+ import uuid
11
+ from collections import deque
12
+ from dataclasses import dataclass, field
13
+ from datetime import datetime, timedelta, timezone
14
+
15
+ logger = logging.getLogger("ai.audit")
16
+
17
+
18
+ @dataclass
19
+ class AuditEntry:
20
+ """A single AI interaction audit record."""
21
+
22
+ id: str = field(default_factory=lambda: str(uuid.uuid4()))
23
+ timestamp: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat())
24
+ model: str = ""
25
+ purpose: str = "unspecified"
26
+ input_preview: str = ""
27
+ confidence: float = 0.0
28
+ needs_human_review: bool = False
29
+ latency_ms: float = 0.0
30
+ token_usage: dict[str, int] | None = None
31
+ success: bool = True
32
+ error: str | None = None
33
+ human_action: str | None = None # approved | rejected | modified
34
+ human_reviewer_id: str | None = None
35
+
36
+
37
+ class AIAuditLog:
38
+ """In-memory audit log with structured logging output."""
39
+
40
+ def __init__(self, max_entries: int = 1000):
41
+ self._entries: deque[AuditEntry] = deque(maxlen=max_entries)
42
+ self._lock = threading.Lock()
43
+
44
+ def log(self, entry: AuditEntry) -> None:
45
+ with self._lock:
46
+ self._entries.append(entry)
47
+
48
+ log_data = {
49
+ "type": "ai_interaction",
50
+ "id": entry.id,
51
+ "timestamp": entry.timestamp,
52
+ "model": entry.model,
53
+ "purpose": entry.purpose,
54
+ "confidence": entry.confidence,
55
+ "needs_human_review": entry.needs_human_review,
56
+ "latency_ms": round(entry.latency_ms, 1),
57
+ "success": entry.success,
58
+ }
59
+
60
+ if entry.error:
61
+ log_data["error"] = entry.error
62
+ logger.warning("[AI_AUDIT] %s", json.dumps(log_data))
63
+ elif entry.confidence < 0.5:
64
+ logger.warning("[AI_AUDIT] %s", json.dumps(log_data))
65
+ else:
66
+ logger.info("[AI_AUDIT] %s", json.dumps(log_data))
67
+
68
+ VALID_ACTIONS = {"approved", "rejected", "modified"}
69
+
70
+ def record_human_review(
71
+ self, audit_id: str, action: str, reviewer_id: str | None = None
72
+ ) -> None:
73
+ if action not in self.VALID_ACTIONS:
74
+ raise ValueError(f"Invalid action: {action}. Must be one of {self.VALID_ACTIONS}")
75
+
76
+ with self._lock:
77
+ for entry in self._entries:
78
+ if entry.id == audit_id:
79
+ entry.human_action = action
80
+ entry.human_reviewer_id = reviewer_id
81
+ break
82
+ else:
83
+ logger.warning("[AI_AUDIT] audit_id not found: %s", audit_id)
84
+ return
85
+
86
+ # Log the review event without adding a duplicate to the buffer
87
+ log_data = {
88
+ "type": "ai_interaction_review",
89
+ "id": entry.id,
90
+ "purpose": f"human-review:{entry.purpose}",
91
+ "action": action,
92
+ "reviewer_id": reviewer_id,
93
+ }
94
+ logger.info("[AI_AUDIT] %s", json.dumps(log_data))
95
+
96
+ def get_recent_entries(self, count: int = 50) -> list[AuditEntry]:
97
+ with self._lock:
98
+ return list(self._entries)[-count:]
99
+
100
+ def get_stats(self, window_seconds: int = 3600) -> dict:
101
+ cutoff = datetime.now(timezone.utc) - timedelta(seconds=window_seconds)
102
+ with self._lock:
103
+ recent = [
104
+ e for e in self._entries
105
+ if datetime.fromisoformat(e.timestamp) >= cutoff
106
+ ]
107
+
108
+ if not recent:
109
+ return {
110
+ "total_calls": 0,
111
+ "success_rate": 1.0,
112
+ "avg_confidence": 0.0,
113
+ "avg_latency_ms": 0.0,
114
+ "human_review_rate": 0.0,
115
+ "error_rate": 0.0,
116
+ }
117
+
118
+ successes = [e for e in recent if e.success]
119
+ reviews = [e for e in recent if e.needs_human_review]
120
+
121
+ return {
122
+ "total_calls": len(recent),
123
+ "success_rate": len(successes) / len(recent),
124
+ "avg_confidence": sum(e.confidence for e in recent) / len(recent),
125
+ "avg_latency_ms": sum(e.latency_ms for e in recent) / len(recent),
126
+ "human_review_rate": len(reviews) / len(recent),
127
+ "error_rate": 1 - (len(successes) / len(recent)),
128
+ }
129
+
130
+
131
+ # --- Singleton ---
132
+
133
+ ai_audit_log = AIAuditLog()
@@ -0,0 +1,323 @@
1
+ """AI Client — Central wrapper for all LLM interactions.
2
+
3
+ Every AI call goes through this client, which provides:
4
+ - Input validation and prompt injection detection
5
+ - Output validation against Pydantic models
6
+ - Confidence scoring with human review routing
7
+ - Structured audit logging (EU AI Act Art. 12)
8
+ - Health metrics collection (NIST AI RMF Manage 3.2)
9
+
10
+ Compliance: EU AI Act (2024/1689), NIST AI RMF 1.0
11
+ """
12
+
13
+ import os
14
+ import time
15
+ import uuid
16
+ from dataclasses import dataclass, field
17
+ from typing import Any, Callable, TypeVar
18
+
19
+ import anthropic
20
+ from pydantic import BaseModel, ValidationError
21
+
22
+ from app.ai.audit_log import ai_audit_log, AuditEntry
23
+ from app.ai.input_guard import validate_input, InputValidationResult
24
+ from app.ai.health import ai_health_metrics
25
+
26
+ T = TypeVar("T", bound=BaseModel)
27
+
28
+
29
+ @dataclass
30
+ class AIClientConfig:
31
+ """Configuration for the AI client."""
32
+
33
+ api_key: str = field(default_factory=lambda: os.environ.get("ANTHROPIC_API_KEY", ""))
34
+ model: str = "claude-sonnet-4-20250514"
35
+ confidence_threshold: float = 0.7
36
+ max_input_length: int = 100_000
37
+ detect_injection: bool = True
38
+ audit_log: bool = True
39
+ moderator: Callable[[str], bool] | None = None
40
+
41
+ def __post_init__(self):
42
+ if not self.api_key:
43
+ raise ValueError("ANTHROPIC_API_KEY environment variable is required")
44
+
45
+
46
+ @dataclass
47
+ class AIResponse:
48
+ """Response from an AI call with metadata."""
49
+
50
+ data: Any
51
+ confidence: float
52
+ needs_human_review: bool
53
+ model: str
54
+ latency_ms: float
55
+ token_usage: dict[str, int] | None = None
56
+ ai_generated: bool = True
57
+ audit_id: str = field(default_factory=lambda: str(uuid.uuid4()))
58
+ error: str | None = None
59
+ blocked: bool = False
60
+
61
+
62
+ class AIClient:
63
+ """Central AI client with guardrails, validation, and audit logging."""
64
+
65
+ def __init__(self, config: AIClientConfig | None = None):
66
+ self.config = config or AIClientConfig()
67
+ self.client = anthropic.AsyncAnthropic(api_key=self.config.api_key)
68
+
69
+ async def generate(
70
+ self,
71
+ prompt: str,
72
+ schema: type[T],
73
+ system_prompt: str | None = None,
74
+ context: str | None = None,
75
+ model: str | None = None,
76
+ confidence_threshold: float | None = None,
77
+ max_retries: int = 2,
78
+ purpose: str = "unspecified",
79
+ ) -> AIResponse:
80
+ """Generate a structured response validated against a Pydantic model.
81
+
82
+ Args:
83
+ prompt: The user prompt.
84
+ schema: Pydantic model class to validate output against.
85
+ system_prompt: Optional system prompt override.
86
+ context: Additional context appended to the prompt.
87
+ model: Override model for this call.
88
+ confidence_threshold: Override threshold for human review.
89
+ max_retries: Max retries on validation failure.
90
+ purpose: Business purpose tag for audit log.
91
+
92
+ Returns:
93
+ AIResponse with validated data, confidence score, and audit trail.
94
+ """
95
+ start_time = time.monotonic()
96
+ model = model or self.config.model
97
+ threshold = confidence_threshold if confidence_threshold is not None else self.config.confidence_threshold
98
+
99
+ # Step 1: Input validation
100
+ input_validation = self._validate_inputs(prompt, context)
101
+ if input_validation.blocked:
102
+ return self._build_blocked_response(input_validation, start_time, model, purpose)
103
+
104
+ # Step 2: Call model with retries
105
+ last_error: Exception | None = None
106
+ for attempt in range(max_retries + 1):
107
+ try:
108
+ response = await self._call_model(prompt, system_prompt, context, model)
109
+
110
+ # Step 3: Parse and validate output
111
+ try:
112
+ parsed = schema.model_validate_json(response["content"])
113
+ except (ValidationError, ValueError) as e:
114
+ last_error = e
115
+ if attempt < max_retries:
116
+ continue
117
+ break
118
+
119
+ # Step 4: Score confidence
120
+ confidence = self._score_confidence(response)
121
+ needs_review = confidence < threshold
122
+
123
+ # Step 5: Build response
124
+ result = AIResponse(
125
+ data=parsed,
126
+ confidence=confidence,
127
+ needs_human_review=needs_review,
128
+ model=model,
129
+ latency_ms=(time.monotonic() - start_time) * 1000,
130
+ token_usage=response.get("usage"),
131
+ ai_generated=True,
132
+ )
133
+
134
+ # Step 6: Audit log
135
+ if self.config.audit_log:
136
+ self._log_interaction(result, prompt, purpose)
137
+
138
+ # Step 7: Health metrics
139
+ ai_health_metrics.record_call(
140
+ model=model,
141
+ latency_ms=result.latency_ms,
142
+ confidence=confidence,
143
+ success=True,
144
+ token_usage=response.get("usage"),
145
+ )
146
+
147
+ return result
148
+
149
+ except Exception as e:
150
+ last_error = e
151
+ if attempt < max_retries:
152
+ continue
153
+
154
+ # All retries exhausted
155
+ latency = (time.monotonic() - start_time) * 1000
156
+ ai_health_metrics.record_call(model=model, latency_ms=latency, confidence=0, success=False)
157
+
158
+ return AIResponse(
159
+ data=None,
160
+ confidence=0,
161
+ needs_human_review=True,
162
+ model=model,
163
+ latency_ms=latency,
164
+ error=str(last_error) if last_error else "AI call failed after retries",
165
+ )
166
+
167
+ def _validate_inputs(self, prompt: str, context: str | None) -> InputValidationResult:
168
+ full_input = f"{prompt}\n{context}" if context else prompt
169
+
170
+ if len(full_input) > self.config.max_input_length:
171
+ return InputValidationResult(
172
+ blocked=True,
173
+ reason=f"Input exceeds maximum length ({self.config.max_input_length} chars)",
174
+ )
175
+
176
+ if self.config.detect_injection:
177
+ result = validate_input(full_input)
178
+ if result.blocked:
179
+ return result
180
+
181
+ if self.config.moderator and self.config.moderator(full_input):
182
+ return InputValidationResult(blocked=True, reason="Content blocked by moderation policy")
183
+
184
+ return InputValidationResult(blocked=False)
185
+
186
+ async def _call_model(
187
+ self,
188
+ prompt: str,
189
+ system_prompt: str | None,
190
+ context: str | None,
191
+ model: str,
192
+ ) -> dict:
193
+ user_content = f"{prompt}\n\nContext:\n{context}" if context else prompt
194
+
195
+ response = await self.client.messages.create(
196
+ model=model,
197
+ max_tokens=4096,
198
+ system=system_prompt or (
199
+ f"You are an AI assistant for {{PROJECT_NAME}}. "
200
+ "Respond with valid JSON matching the requested schema. Be precise and factual."
201
+ ),
202
+ messages=[{"role": "user", "content": user_content}],
203
+ )
204
+
205
+ text = ""
206
+ for block in response.content:
207
+ if block.type == "text":
208
+ text = block.text
209
+ break
210
+
211
+ # Extract JSON from response
212
+ content = self._extract_json(text)
213
+
214
+ return {
215
+ "content": content,
216
+ "usage": {
217
+ "input_tokens": response.usage.input_tokens,
218
+ "output_tokens": response.usage.output_tokens,
219
+ },
220
+ "stop_reason": response.stop_reason,
221
+ }
222
+
223
+ def _extract_json(self, text: str) -> str:
224
+ """Extract JSON from model response, handling markdown code blocks."""
225
+ import re
226
+
227
+ text = text.strip()
228
+
229
+ # Try direct parse
230
+ if text.startswith("{") or text.startswith("["):
231
+ return text
232
+
233
+ # Extract from code blocks
234
+ match = re.search(r"```(?:json)?\s*\n?([\s\S]*?)\n?```", text)
235
+ if match:
236
+ return match.group(1).strip()
237
+
238
+ return text
239
+
240
+ def _score_confidence(self, response: dict) -> float:
241
+ score = 0.85
242
+
243
+ if response.get("stop_reason") == "max_tokens":
244
+ score -= 0.3
245
+
246
+ usage = response.get("usage", {})
247
+ output_tokens = usage.get("output_tokens", 0)
248
+
249
+ if output_tokens < 10:
250
+ score -= 0.2
251
+ if output_tokens > 3000:
252
+ score -= 0.1
253
+
254
+ return max(0.0, min(1.0, score))
255
+
256
+ def _log_interaction(self, result: AIResponse, prompt: str, purpose: str) -> None:
257
+ entry = AuditEntry(
258
+ id=result.audit_id,
259
+ model=result.model,
260
+ purpose=purpose,
261
+ input_preview=prompt[:100] + ("..." if len(prompt) > 100 else ""),
262
+ confidence=result.confidence,
263
+ needs_human_review=result.needs_human_review,
264
+ latency_ms=result.latency_ms,
265
+ token_usage=result.token_usage,
266
+ success=result.error is None,
267
+ error=result.error,
268
+ )
269
+ ai_audit_log.log(entry)
270
+
271
+ def _build_blocked_response(
272
+ self,
273
+ validation: InputValidationResult,
274
+ start_time: float,
275
+ model: str,
276
+ purpose: str,
277
+ ) -> AIResponse:
278
+ result = AIResponse(
279
+ data=None,
280
+ confidence=0,
281
+ needs_human_review=False,
282
+ model=model,
283
+ latency_ms=(time.monotonic() - start_time) * 1000,
284
+ ai_generated=False,
285
+ error=f"Input blocked: {validation.reason}",
286
+ blocked=True,
287
+ )
288
+
289
+ if self.config.audit_log:
290
+ ai_audit_log.log(AuditEntry(
291
+ id=result.audit_id,
292
+ model=model,
293
+ purpose=purpose,
294
+ input_preview="[BLOCKED]",
295
+ confidence=0,
296
+ needs_human_review=False,
297
+ latency_ms=result.latency_ms,
298
+ success=False,
299
+ error=validation.reason,
300
+ ))
301
+
302
+ return result
303
+
304
+
305
+ # --- Singleton ---
306
+
307
+ _default_client: AIClient | None = None
308
+
309
+
310
+ def get_ai_client(config: AIClientConfig | None = None) -> AIClient:
311
+ """Get the singleton AI client. Call with config on first use only."""
312
+ global _default_client
313
+ if _default_client is None:
314
+ _default_client = AIClient(config)
315
+ elif config is not None:
316
+ import warnings
317
+ warnings.warn(
318
+ "AIClient already initialized; ignoring provided config. "
319
+ "Call get_ai_client() without arguments or use AIClient(config) directly.",
320
+ UserWarning,
321
+ stacklevel=2,
322
+ )
323
+ return _default_client
@@ -0,0 +1,157 @@
1
+ """AI Health Check — Observability endpoint for AI system metrics.
2
+
3
+ Compliance: NIST AI RMF Manage 3.2 (monitoring),
4
+ EU AI Act Art. 9 (risk management)
5
+
6
+ Mount as: router.include_router(ai_health_router, prefix="/api/ai")
7
+ """
8
+
9
+ import time
10
+ from collections import deque
11
+ from dataclasses import dataclass
12
+ from typing import Any
13
+
14
+ from fastapi import APIRouter
15
+
16
+ ai_health_router = APIRouter(tags=["ai-health"])
17
+
18
+
19
+ @dataclass
20
+ class CallMetric:
21
+ timestamp: float
22
+ model: str
23
+ latency_ms: float
24
+ confidence: float
25
+ success: bool
26
+ token_usage: dict[str, int] | None = None
27
+
28
+
29
+ class AIHealthMetrics:
30
+ def __init__(self, max_metrics: int = 5000):
31
+ self._metrics: deque[CallMetric] = deque(maxlen=max_metrics)
32
+
33
+ def record_call(
34
+ self,
35
+ model: str,
36
+ latency_ms: float,
37
+ confidence: float,
38
+ success: bool,
39
+ token_usage: dict[str, int] | None = None,
40
+ ) -> None:
41
+ self._metrics.append(CallMetric(
42
+ timestamp=time.time(),
43
+ model=model,
44
+ latency_ms=latency_ms,
45
+ confidence=confidence,
46
+ success=success,
47
+ token_usage=token_usage,
48
+ ))
49
+
50
+ def get_health_status(self, window_seconds: int = 3600) -> dict[str, Any]:
51
+ cutoff = time.time() - window_seconds
52
+ recent = [m for m in self._metrics if m.timestamp >= cutoff]
53
+
54
+ if not recent:
55
+ return {
56
+ "status": "ok",
57
+ "ai_available": True,
58
+ "message": "No AI calls in the monitoring window",
59
+ "window": f"{window_seconds // 60}m",
60
+ "metrics": self._empty_metrics(),
61
+ "models": {},
62
+ }
63
+
64
+ successes = [m for m in recent if m.success]
65
+ error_rate = 1 - (len(successes) / len(recent))
66
+ avg_confidence = (
67
+ sum(m.confidence for m in successes) / len(successes) if successes else 0
68
+ )
69
+ avg_latency = sum(m.latency_ms for m in recent) / len(recent)
70
+ low_confidence = [m for m in successes if m.confidence < 0.7]
71
+ low_confidence_rate = len(low_confidence) / max(len(successes), 1)
72
+
73
+ # Determine status
74
+ status = "ok"
75
+ warnings: list[str] = []
76
+
77
+ if error_rate > 0.5:
78
+ status = "unhealthy"
79
+ warnings.append(f"High error rate: {error_rate * 100:.1f}%")
80
+ elif error_rate > 0.1:
81
+ status = "degraded"
82
+ warnings.append(f"Elevated error rate: {error_rate * 100:.1f}%")
83
+
84
+ if avg_confidence < 0.5:
85
+ status = "degraded" if status == "ok" else status
86
+ warnings.append(f"Low average confidence: {avg_confidence * 100:.1f}%")
87
+
88
+ if avg_latency > 10_000:
89
+ status = "degraded" if status == "ok" else status
90
+ warnings.append(f"High average latency: {avg_latency:.0f}ms")
91
+
92
+ # Per-model breakdown
93
+ models: dict[str, dict] = {}
94
+ model_names = set(m.model for m in recent)
95
+ for model_name in model_names:
96
+ model_calls = [m for m in recent if m.model == model_name]
97
+ model_successes = [m for m in model_calls if m.success]
98
+ total_tokens = sum(
99
+ (m.token_usage.get("input_tokens", 0) + m.token_usage.get("output_tokens", 0))
100
+ for m in model_calls if m.token_usage
101
+ )
102
+ models[model_name] = {
103
+ "calls": len(model_calls),
104
+ "success_rate": len(model_successes) / len(model_calls),
105
+ "avg_latency_ms": sum(m.latency_ms for m in model_calls) / len(model_calls),
106
+ "avg_confidence": (
107
+ sum(m.confidence for m in model_successes) / len(model_successes)
108
+ if model_successes else 0
109
+ ),
110
+ "total_tokens": total_tokens,
111
+ }
112
+
113
+ return {
114
+ "status": status,
115
+ "ai_available": error_rate < 1,
116
+ "message": "; ".join(warnings) if warnings else "All AI systems operating normally",
117
+ "window": f"{window_seconds // 60}m",
118
+ "metrics": {
119
+ "total_calls": len(recent),
120
+ "success_rate": 1 - error_rate,
121
+ "avg_confidence": avg_confidence,
122
+ "avg_latency_ms": avg_latency,
123
+ "low_confidence_rate": low_confidence_rate,
124
+ "error_rate": error_rate,
125
+ },
126
+ "models": models,
127
+ }
128
+
129
+ def _empty_metrics(self) -> dict:
130
+ return {
131
+ "total_calls": 0,
132
+ "success_rate": 1.0,
133
+ "avg_confidence": 0.0,
134
+ "avg_latency_ms": 0.0,
135
+ "low_confidence_rate": 0.0,
136
+ "error_rate": 0.0,
137
+ }
138
+
139
+
140
+ # --- Singleton ---
141
+
142
+ ai_health_metrics = AIHealthMetrics()
143
+
144
+
145
+ # --- FastAPI Health Endpoint ---
146
+
147
+ @ai_health_router.get("/health")
148
+ async def ai_health():
149
+ """AI system health check.
150
+
151
+ Returns model availability, confidence distribution,
152
+ error rates, and per-model metrics.
153
+ """
154
+ return ai_health_metrics.get_health_status()
155
+
156
+
157
+ AIHealthStatus = dict # Type alias for documentation