thevoidforge 21.0.10 → 21.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. package/dist/.claude/commands/ai.md +69 -0
  2. package/dist/.claude/commands/architect.md +121 -0
  3. package/dist/.claude/commands/assemble.md +201 -0
  4. package/dist/.claude/commands/assess.md +75 -0
  5. package/dist/.claude/commands/blueprint.md +135 -0
  6. package/dist/.claude/commands/build.md +116 -0
  7. package/dist/.claude/commands/campaign.md +201 -0
  8. package/dist/.claude/commands/cultivation.md +166 -0
  9. package/dist/.claude/commands/current.md +128 -0
  10. package/dist/.claude/commands/dangerroom.md +74 -0
  11. package/dist/.claude/commands/debrief.md +178 -0
  12. package/dist/.claude/commands/deploy.md +99 -0
  13. package/dist/.claude/commands/devops.md +143 -0
  14. package/dist/.claude/commands/gauntlet.md +140 -0
  15. package/dist/.claude/commands/git.md +104 -0
  16. package/dist/.claude/commands/grow.md +146 -0
  17. package/dist/.claude/commands/imagine.md +126 -0
  18. package/dist/.claude/commands/portfolio.md +50 -0
  19. package/dist/.claude/commands/prd.md +113 -0
  20. package/dist/.claude/commands/qa.md +107 -0
  21. package/dist/.claude/commands/review.md +151 -0
  22. package/dist/.claude/commands/security.md +100 -0
  23. package/dist/.claude/commands/test.md +96 -0
  24. package/dist/.claude/commands/thumper.md +116 -0
  25. package/dist/.claude/commands/treasury.md +100 -0
  26. package/dist/.claude/commands/ux.md +118 -0
  27. package/dist/.claude/commands/vault.md +189 -0
  28. package/dist/.claude/commands/void.md +108 -0
  29. package/dist/CHANGELOG.md +1918 -0
  30. package/dist/CLAUDE.md +250 -0
  31. package/dist/HOLOCRON.md +856 -0
  32. package/dist/VERSION.md +123 -0
  33. package/dist/docs/NAMING_REGISTRY.md +478 -0
  34. package/dist/docs/methods/AI_INTELLIGENCE.md +276 -0
  35. package/dist/docs/methods/ASSEMBLER.md +142 -0
  36. package/dist/docs/methods/BACKEND_ENGINEER.md +165 -0
  37. package/dist/docs/methods/BUILD_JOURNAL.md +185 -0
  38. package/dist/docs/methods/BUILD_PROTOCOL.md +426 -0
  39. package/dist/docs/methods/CAMPAIGN.md +568 -0
  40. package/dist/docs/methods/CONTEXT_MANAGEMENT.md +189 -0
  41. package/dist/docs/methods/DEEP_CURRENT.md +184 -0
  42. package/dist/docs/methods/DEVOPS_ENGINEER.md +295 -0
  43. package/dist/docs/methods/FIELD_MEDIC.md +261 -0
  44. package/dist/docs/methods/FORGE_ARTIST.md +108 -0
  45. package/dist/docs/methods/FORGE_KEEPER.md +268 -0
  46. package/dist/docs/methods/GAUNTLET.md +344 -0
  47. package/dist/docs/methods/GROWTH_STRATEGIST.md +466 -0
  48. package/dist/docs/methods/HEARTBEAT.md +168 -0
  49. package/dist/docs/methods/MCP_INTEGRATION.md +139 -0
  50. package/dist/docs/methods/MUSTER.md +148 -0
  51. package/dist/docs/methods/PRD_GENERATOR.md +186 -0
  52. package/dist/docs/methods/PRODUCT_DESIGN_FRONTEND.md +250 -0
  53. package/dist/docs/methods/QA_ENGINEER.md +337 -0
  54. package/dist/docs/methods/RELEASE_MANAGER.md +145 -0
  55. package/dist/docs/methods/SECURITY_AUDITOR.md +320 -0
  56. package/dist/docs/methods/SUB_AGENTS.md +335 -0
  57. package/dist/docs/methods/SYSTEMS_ARCHITECT.md +171 -0
  58. package/dist/docs/methods/TESTING.md +359 -0
  59. package/dist/docs/methods/THUMPER.md +175 -0
  60. package/dist/docs/methods/TIME_VAULT.md +120 -0
  61. package/dist/docs/methods/TREASURY.md +184 -0
  62. package/dist/docs/methods/TROUBLESHOOTING.md +265 -0
  63. package/dist/docs/patterns/README.md +52 -0
  64. package/dist/docs/patterns/ad-billing-adapter.ts +537 -0
  65. package/dist/docs/patterns/ad-platform-adapter.ts +421 -0
  66. package/dist/docs/patterns/ai-classifier.ts +195 -0
  67. package/dist/docs/patterns/ai-eval.ts +272 -0
  68. package/dist/docs/patterns/ai-orchestrator.ts +341 -0
  69. package/dist/docs/patterns/ai-router.ts +194 -0
  70. package/dist/docs/patterns/ai-tool-schema.ts +237 -0
  71. package/dist/docs/patterns/api-route.ts +241 -0
  72. package/dist/docs/patterns/backtest-engine.ts +499 -0
  73. package/dist/docs/patterns/browser-review.ts +292 -0
  74. package/dist/docs/patterns/combobox.tsx +300 -0
  75. package/dist/docs/patterns/component.tsx +262 -0
  76. package/dist/docs/patterns/daemon-process.ts +338 -0
  77. package/dist/docs/patterns/data-pipeline.ts +297 -0
  78. package/dist/docs/patterns/database-migration.ts +466 -0
  79. package/dist/docs/patterns/e2e-test.ts +629 -0
  80. package/dist/docs/patterns/error-handling.ts +312 -0
  81. package/dist/docs/patterns/execution-safety.ts +601 -0
  82. package/dist/docs/patterns/financial-transaction.ts +342 -0
  83. package/dist/docs/patterns/funding-plan.ts +462 -0
  84. package/dist/docs/patterns/game-entity.ts +137 -0
  85. package/dist/docs/patterns/game-loop.ts +113 -0
  86. package/dist/docs/patterns/game-state.ts +143 -0
  87. package/dist/docs/patterns/job-queue.ts +225 -0
  88. package/dist/docs/patterns/kongo-integration.ts +164 -0
  89. package/dist/docs/patterns/middleware.ts +363 -0
  90. package/dist/docs/patterns/mobile-screen.tsx +139 -0
  91. package/dist/docs/patterns/mobile-service.ts +167 -0
  92. package/dist/docs/patterns/multi-tenant.ts +382 -0
  93. package/dist/docs/patterns/oauth-token-lifecycle.ts +223 -0
  94. package/dist/docs/patterns/outbound-rate-limiter.ts +260 -0
  95. package/dist/docs/patterns/prompt-template.ts +195 -0
  96. package/dist/docs/patterns/revenue-source-adapter.ts +311 -0
  97. package/dist/docs/patterns/service.ts +224 -0
  98. package/dist/docs/patterns/sse-endpoint.ts +118 -0
  99. package/dist/docs/patterns/stablecoin-adapter.ts +511 -0
  100. package/dist/docs/patterns/third-party-script.ts +68 -0
  101. package/dist/scripts/thumper/gom-jabbar.sh +241 -0
  102. package/dist/scripts/thumper/relay.sh +610 -0
  103. package/dist/scripts/thumper/scan.sh +359 -0
  104. package/dist/scripts/thumper/thumper.sh +190 -0
  105. package/dist/scripts/thumper/water-rings.sh +76 -0
  106. package/dist/scripts/voidforge.js +1 -1
  107. package/package.json +1 -1
  108. package/dist/tsconfig.tsbuildinfo +0 -1
@@ -0,0 +1,272 @@
1
+ /**
2
+ * Pattern: AI Eval
3
+ *
4
+ * Key principles:
5
+ * - Every AI feature needs a golden dataset — input/expected-output pairs
6
+ * - Automated eval runs catch regressions before they reach production
7
+ * - Compare scores across prompt versions — never ship a prompt that scores lower
8
+ * - Scoring functions are pluggable — exact match, semantic similarity, custom
9
+ * - Eval results are stored, not printed — you need history to detect drift
10
+ *
11
+ * Agents: Batman (testing/validation), Picard (architecture), L (monitoring)
12
+ *
13
+ * Provider note: Eval runs use the same model call patterns from ai-orchestrator.ts.
14
+ * The eval framework itself is provider-agnostic.
15
+ */
16
+
17
+ // --- Core types ---
18
+
19
+ /** A single test case in a golden dataset. */
20
+ export interface EvalCase<TInput = string, TExpected = string> {
21
+ id: string // Stable ID for tracking across runs
22
+ input: TInput
23
+ expected: TExpected
24
+ tags?: string[] // e.g., ['edge-case', 'billing', 'multi-language']
25
+ }
26
+
27
+ /** Result of evaluating a single case. */
28
+ export interface CaseResult {
29
+ caseId: string
30
+ passed: boolean
31
+ score: number // 0.0 - 1.0
32
+ actual: string // What the model returned
33
+ expected: string // What we wanted
34
+ latencyMs: number
35
+ error?: string // If the model call failed
36
+ }
37
+
38
+ /** Aggregate result of an eval run. */
39
+ export interface EvalResult {
40
+ runId: string
41
+ promptVersion: string
42
+ model: string
43
+ timestamp: string
44
+ totalCases: number
45
+ passedCases: number
46
+ averageScore: number
47
+ averageLatencyMs: number
48
+ caseResults: CaseResult[]
49
+ tags: Record<string, { count: number; avgScore: number }> // Per-tag breakdown
50
+ }
51
+
52
+ /** Comparison between two eval runs. */
53
+ export interface VersionComparison {
54
+ baseVersion: string
55
+ candidateVersion: string
56
+ baseScore: number
57
+ candidateScore: number
58
+ delta: number // Positive = improvement, negative = regression
59
+ regressions: CaseResult[] // Cases that got worse
60
+ improvements: CaseResult[] // Cases that got better
61
+ verdict: 'pass' | 'fail' | 'review' // Based on regression threshold
62
+ }
63
+
64
+ // --- Scoring functions ---
65
+
66
+ /** Exact string match (case-insensitive). */
67
+ export function exactMatch(actual: string, expected: string): number {
68
+ return actual.trim().toLowerCase() === expected.trim().toLowerCase() ? 1.0 : 0.0
69
+ }
70
+
71
+ /** Check if expected value is contained in actual output. */
72
+ export function containsMatch(actual: string, expected: string): number {
73
+ return actual.toLowerCase().includes(expected.toLowerCase()) ? 1.0 : 0.0
74
+ }
75
+
76
+ /** JSON field match — compare specific fields in JSON outputs. */
77
+ export function jsonFieldMatch(
78
+ actual: string,
79
+ expected: string,
80
+ fields: string[]
81
+ ): number {
82
+ try {
83
+ const actualObj = JSON.parse(actual)
84
+ const expectedObj = JSON.parse(expected)
85
+ let matches = 0
86
+ for (const field of fields) {
87
+ if (actualObj[field] === expectedObj[field]) matches++
88
+ }
89
+ return matches / fields.length
90
+ } catch {
91
+ return 0.0 // Parse failure = score 0
92
+ }
93
+ }
94
+
95
+ // --- EvalSuite ---
96
+
97
+ type ModelRunner = (input: string) => Promise<string>
98
+ type ScoringFunction = (actual: string, expected: string) => number
99
+
100
+ export class EvalSuite<TInput = string> {
101
+ private cases: EvalCase<TInput, string>[] = []
102
+ private scoreFn: ScoringFunction = exactMatch
103
+ private passThreshold = 0.8 // Case passes if score >= this
104
+
105
+ constructor(private name: string) {}
106
+
107
+ /** Add a test case to the suite. */
108
+ addCase(testCase: EvalCase<TInput, string>): this {
109
+ this.cases.push(testCase)
110
+ return this
111
+ }
112
+
113
+ /** Add multiple test cases. */
114
+ addCases(cases: EvalCase<TInput, string>[]): this {
115
+ this.cases.push(...cases)
116
+ return this
117
+ }
118
+
119
+ /** Set the scoring function (default: exactMatch). */
120
+ withScoring(fn: ScoringFunction): this {
121
+ this.scoreFn = fn
122
+ return this
123
+ }
124
+
125
+ /** Set the pass threshold (default: 0.8). */
126
+ withPassThreshold(threshold: number): this {
127
+ this.passThreshold = threshold
128
+ return this
129
+ }
130
+
131
+ /** Run the eval suite against a model runner function. */
132
+ async run(
133
+ runner: ModelRunner,
134
+ promptVersion: string,
135
+ model: string
136
+ ): Promise<EvalResult> {
137
+ const runId = `eval-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`
138
+ const caseResults: CaseResult[] = []
139
+
140
+ // Run cases sequentially to avoid rate limits. For large suites,
141
+ // batch with concurrency limit (e.g., p-limit with concurrency 5).
142
+ for (const testCase of this.cases) {
143
+ const start = Date.now()
144
+ let actual = ''
145
+ let error: string | undefined
146
+
147
+ try {
148
+ actual = await runner(testCase.input as string)
149
+ } catch (e) {
150
+ error = e instanceof Error ? e.message : 'Unknown error'
151
+ }
152
+
153
+ const score = error ? 0 : this.scoreFn(actual, testCase.expected)
154
+
155
+ caseResults.push({
156
+ caseId: testCase.id,
157
+ passed: score >= this.passThreshold,
158
+ score,
159
+ actual,
160
+ expected: testCase.expected,
161
+ latencyMs: Date.now() - start,
162
+ error,
163
+ })
164
+ }
165
+
166
+ // Compute per-tag breakdowns
167
+ const tags: Record<string, { count: number; avgScore: number }> = {}
168
+ for (const testCase of this.cases) {
169
+ for (const tag of testCase.tags ?? []) {
170
+ if (!tags[tag]) tags[tag] = { count: 0, avgScore: 0 }
171
+ const result = caseResults.find((r) => r.caseId === testCase.id)
172
+ if (result) {
173
+ tags[tag].count++
174
+ tags[tag].avgScore += result.score
175
+ }
176
+ }
177
+ }
178
+ for (const tag of Object.values(tags)) {
179
+ tag.avgScore = tag.avgScore / tag.count
180
+ }
181
+
182
+ return {
183
+ runId,
184
+ promptVersion,
185
+ model,
186
+ timestamp: new Date().toISOString(),
187
+ totalCases: this.cases.length,
188
+ passedCases: caseResults.filter((r) => r.passed).length,
189
+ averageScore: caseResults.reduce((sum, r) => sum + r.score, 0) / caseResults.length,
190
+ averageLatencyMs: caseResults.reduce((sum, r) => sum + r.latencyMs, 0) / caseResults.length,
191
+ caseResults,
192
+ tags,
193
+ }
194
+ }
195
+ }
196
+
197
+ // --- Version comparison ---
198
+
199
+ const REGRESSION_THRESHOLD = 0.02 // 2% drop triggers review
200
+
201
+ export function compareVersions(
202
+ base: EvalResult,
203
+ candidate: EvalResult
204
+ ): VersionComparison {
205
+ const delta = candidate.averageScore - base.averageScore
206
+
207
+ // Find regressions: cases that scored lower in the candidate
208
+ const regressions: CaseResult[] = []
209
+ const improvements: CaseResult[] = []
210
+
211
+ for (const candidateCase of candidate.caseResults) {
212
+ const baseCase = base.caseResults.find((b) => b.caseId === candidateCase.caseId)
213
+ if (!baseCase) continue
214
+
215
+ if (candidateCase.score < baseCase.score) regressions.push(candidateCase)
216
+ if (candidateCase.score > baseCase.score) improvements.push(candidateCase)
217
+ }
218
+
219
+ let verdict: VersionComparison['verdict'] = 'pass'
220
+ if (delta < -REGRESSION_THRESHOLD) verdict = 'fail'
221
+ else if (regressions.length > 0) verdict = 'review'
222
+
223
+ return {
224
+ baseVersion: base.promptVersion,
225
+ candidateVersion: candidate.promptVersion,
226
+ baseScore: base.averageScore,
227
+ candidateScore: candidate.averageScore,
228
+ delta,
229
+ regressions,
230
+ improvements,
231
+ verdict,
232
+ }
233
+ }
234
+
235
+ // --- Usage example ---
236
+
237
+ // const suite = new EvalSuite('ticket-classifier')
238
+ // .withScoring(jsonFieldMatch)
239
+ // .addCases([
240
+ // { id: 'billing-1', input: 'I was charged twice', expected: '{"label":"billing"}', tags: ['billing'] },
241
+ // { id: 'tech-1', input: 'App crashes on login', expected: '{"label":"technical"}', tags: ['technical'] },
242
+ // ])
243
+ //
244
+ // const baseResult = await suite.run(classifyV1, '2024.01.01', 'claude-sonnet-4-20250514')
245
+ // const candidateResult = await suite.run(classifyV2, '2024.01.15', 'claude-sonnet-4-20250514')
246
+ // const comparison = compareVersions(baseResult, candidateResult)
247
+ //
248
+ // if (comparison.verdict === 'fail') {
249
+ // console.error(`Regression detected: ${comparison.delta.toFixed(3)} score drop`)
250
+ // process.exit(1) // Fail CI
251
+ // }
252
+
253
+ /**
254
+ * Framework adaptations:
255
+ *
256
+ * Express:
257
+ * - Run evals in CI (GitHub Actions) on prompt file changes
258
+ * - Store EvalResult in S3/database for historical comparison
259
+ * - Endpoint to trigger eval: POST /api/admin/eval (admin-only)
260
+ *
261
+ * FastAPI:
262
+ * - Same EvalSuite shape in Python with pytest fixtures
263
+ * - Use pytest-benchmark for latency tracking
264
+ * - Store results in PostgreSQL with SQLAlchemy models
265
+ * - CI: run eval suite in GitHub Actions, compare with previous run
266
+ *
267
+ * Django:
268
+ * - Management command: python manage.py run_eval --suite ticket-classifier
269
+ * - EvalResult and CaseResult as Django models for admin dashboard
270
+ * - Compare versions in admin: side-by-side eval result view
271
+ * - Celery task for large eval suites (100+ cases)
272
+ */
@@ -0,0 +1,341 @@
1
+ /**
2
+ * Pattern: AI Orchestrator
3
+ *
4
+ * Three patterns for coordinating AI model calls:
5
+ * 1. Simple completion — single call with structured output
6
+ * 2. Agent loop — model calls tools in a loop until done
7
+ * 3. Circuit breaker — prevent cascading AI failures
8
+ *
9
+ * Key principles:
10
+ * - Always set MAX_ITERATIONS on agent loops — unbounded loops burn tokens
11
+ * - Retry with exponential backoff on transient failures (429, 5xx)
12
+ * - Circuit breaker protects downstream when a provider is degraded
13
+ * - Parse and validate model output with Zod — never trust raw JSON
14
+ * - Log every model call with latency, tokens, and model version
15
+ *
16
+ * Agents: Picard (architecture), Stark (backend), Kenobi (rate limits)
17
+ *
18
+ * Provider note: Primary examples use Anthropic SDK (@anthropic-ai/sdk).
19
+ * OpenAI adaptation is noted inline. The shapes are provider-agnostic.
20
+ */
21
+
22
+ import Anthropic from '@anthropic-ai/sdk'
23
+ import { z } from 'zod'
24
+
25
+ // --- 1. Simple Completion — single call → structured output ---
26
+
27
+ const SummarySchema = z.object({
28
+ title: z.string(),
29
+ bullets: z.array(z.string()).min(1).max(5),
30
+ sentiment: z.enum(['positive', 'negative', 'neutral']),
31
+ })
32
+
33
+ type Summary = z.infer<typeof SummarySchema>
34
+
35
+ /** Single model call with retry and structured output parsing. */
36
+ export async function executeWithRetry<T>(
37
+ fn: () => Promise<T>,
38
+ maxRetries = 3,
39
+ baseDelayMs = 1000
40
+ ): Promise<T> {
41
+ for (let attempt = 0; attempt <= maxRetries; attempt++) {
42
+ try {
43
+ return await fn()
44
+ } catch (error: unknown) {
45
+ const isRetryable =
46
+ error instanceof Error &&
47
+ ('status' in error && [429, 500, 502, 503].includes((error as { status: number }).status))
48
+
49
+ if (!isRetryable || attempt === maxRetries) throw error
50
+
51
+ // Exponential backoff with jitter
52
+ const delay = baseDelayMs * 2 ** attempt + Math.random() * baseDelayMs
53
+ await new Promise((r) => setTimeout(r, delay))
54
+ }
55
+ }
56
+ throw new Error('Unreachable') // TypeScript needs this
57
+ }
58
+
59
+ export async function summarize(client: Anthropic, text: string): Promise<Summary> {
60
+ const response = await executeWithRetry(() =>
61
+ client.messages.create({
62
+ model: 'claude-sonnet-4-20250514',
63
+ max_tokens: 512,
64
+ messages: [{ role: 'user', content: `Summarize as JSON: ${text}` }],
65
+ // System prompt enforces output shape
66
+ system: 'Respond with JSON matching: { title, bullets[], sentiment }',
67
+ })
68
+ )
69
+
70
+ // Extract text content, parse with Zod — never trust raw model output
71
+ const content = response.content[0]
72
+ if (content.type !== 'text') throw new Error('Expected text response')
73
+ const parsed = JSON.parse(content.text)
74
+ return SummarySchema.parse(parsed) // Throws ZodError on invalid shape
75
+ }
76
+
77
+ // OpenAI adaptation:
78
+ // const response = await openai.chat.completions.create({
79
+ // model: 'gpt-4o', messages: [...],
80
+ // response_format: { type: 'json_object' }, // OpenAI JSON mode
81
+ // })
82
+ // const parsed = JSON.parse(response.choices[0].message.content)
83
+
84
+ // --- 2. Agent Loop — model calls tools until done ---
85
+
86
+ const MAX_ITERATIONS = 10 // Hard bound — never remove this
87
+
88
+ interface ToolDefinition {
89
+ name: string
90
+ description: string
91
+ input_schema: Record<string, unknown>
92
+ }
93
+
94
+ interface AgentResult {
95
+ finalResponse: string
96
+ toolCallCount: number
97
+ iterations: number
98
+ }
99
+
100
+ /** Agent loop: model decides which tools to call, iterates until done. */
101
+ export async function runAgentLoop(
102
+ client: Anthropic,
103
+ prompt: string,
104
+ tools: ToolDefinition[],
105
+ executeTool: (name: string, input: Record<string, unknown>) => Promise<string>
106
+ ): Promise<AgentResult> {
107
+ const messages: Anthropic.MessageParam[] = [{ role: 'user', content: prompt }]
108
+ let toolCallCount = 0
109
+
110
+ for (let i = 0; i < MAX_ITERATIONS; i++) {
111
+ const response = await executeWithRetry(() =>
112
+ client.messages.create({
113
+ model: 'claude-sonnet-4-20250514',
114
+ max_tokens: 4096,
115
+ messages,
116
+ tools: tools as Anthropic.Tool[],
117
+ })
118
+ )
119
+
120
+ // If model responds with text only (no tool calls), we're done
121
+ if (response.stop_reason === 'end_turn') {
122
+ const text = response.content.find((c) => c.type === 'text')
123
+ return { finalResponse: text?.text ?? '', toolCallCount, iterations: i + 1 }
124
+ }
125
+
126
+ // Process tool calls
127
+ const toolUseBlocks = response.content.filter((c) => c.type === 'tool_use')
128
+ const toolResults: Anthropic.ToolResultBlockParam[] = []
129
+
130
+ for (const block of toolUseBlocks) {
131
+ if (block.type !== 'tool_use') continue
132
+ toolCallCount++
133
+ try {
134
+ const result = await executeTool(block.name, block.input as Record<string, unknown>)
135
+ toolResults.push({ type: 'tool_result', tool_use_id: block.id, content: result })
136
+ } catch (error) {
137
+ // Send error back to model — it can recover or try a different approach
138
+ toolResults.push({
139
+ type: 'tool_result',
140
+ tool_use_id: block.id,
141
+ content: `Error: ${error instanceof Error ? error.message : 'Unknown error'}`,
142
+ is_error: true,
143
+ })
144
+ }
145
+ }
146
+
147
+ // Feed assistant response + tool results back for next iteration
148
+ messages.push({ role: 'assistant', content: response.content })
149
+ messages.push({ role: 'user', content: toolResults })
150
+ }
151
+
152
+ throw new Error(`Agent loop exceeded MAX_ITERATIONS (${MAX_ITERATIONS})`)
153
+ }
154
+
155
+ // --- 3. Circuit Breaker — prevent cascading AI failures ---
156
+
157
+ type CircuitState = 'closed' | 'open' | 'half-open'
158
+
159
+ /** Circuit breaker for AI provider calls. Opens after threshold failures. */
160
+ export class CircuitBreaker {
161
+ private state: CircuitState = 'closed'
162
+ private failureCount = 0
163
+ private lastFailureTime = 0
164
+
165
+ constructor(
166
+ private readonly failureThreshold: number = 5,
167
+ private readonly resetTimeoutMs: number = 60_000
168
+ ) {}
169
+
170
+ async execute<T>(fn: () => Promise<T>, fallback: () => Promise<T>): Promise<T> {
171
+ if (this.state === 'open') {
172
+ // Check if enough time has passed to try again
173
+ if (Date.now() - this.lastFailureTime > this.resetTimeoutMs) {
174
+ this.state = 'half-open'
175
+ } else {
176
+ return fallback() // Circuit open — use fallback immediately
177
+ }
178
+ }
179
+
180
+ try {
181
+ const result = await fn()
182
+ // Success — reset if we were testing
183
+ if (this.state === 'half-open') this.state = 'closed'
184
+ this.failureCount = 0
185
+ return result
186
+ } catch (error) {
187
+ this.failureCount++
188
+ this.lastFailureTime = Date.now()
189
+
190
+ if (this.failureCount >= this.failureThreshold) {
191
+ this.state = 'open'
192
+ }
193
+ return fallback()
194
+ }
195
+ }
196
+
197
+ getState(): CircuitState {
198
+ return this.state
199
+ }
200
+ }
201
+
202
+ // Usage:
203
+ // const breaker = new CircuitBreaker(5, 60_000)
204
+ // const result = await breaker.execute(
205
+ // () => summarize(anthropicClient, text),
206
+ // () => ruleBased.summarize(text) // Fallback: no AI, just rules
207
+ // )
208
+
209
+ /**
210
+ * Framework adaptations:
211
+ *
212
+ * Express:
213
+ * - Wrap agent loop in an Express route with req.setTimeout() for long-running calls
214
+ * - Circuit breaker as singleton middleware: app.use(aiCircuitBreaker)
215
+ * - Stream partial results via res.write() for SSE (see sse-endpoint.ts)
216
+ *
217
+ * FastAPI:
218
+ * - executeWithRetry → tenacity.retry(wait=wait_exponential(), stop=stop_after_attempt(3))
219
+ * - Agent loop: same shape, use httpx.AsyncClient for provider calls
220
+ * - Circuit breaker: pybreaker library or roll your own with same state machine
221
+ * - Background agent loops: FastAPI BackgroundTasks or Celery
222
+ *
223
+ * Django:
224
+ * - Services layer (services.py) holds orchestration logic — never in views
225
+ * - Circuit breaker state in Django cache (Redis) for multi-process
226
+ * - Agent loops in Celery tasks with soft_time_limit for MAX_ITERATIONS equivalent
227
+ * - Use django-ratelimit on the view to protect upstream AI spend
228
+ */
229
+
230
+ // --- 4. Multi-Tenant AI — per-org isolation, keys, cost tracking ---
231
+
232
+ /** Per-Tenant Circuit Breakers — scoped by provider+orgId, not just provider.
233
+ * One org's invalid API key must not trip the breaker for all orgs. */
234
+ const tenantBreakers = new Map<string, CircuitBreaker>()
235
+
236
+ function getTenantBreaker(provider: string, orgId: string): CircuitBreaker {
237
+ const key = `${provider}:${orgId}`
238
+ let breaker = tenantBreakers.get(key)
239
+ if (!breaker) {
240
+ breaker = new CircuitBreaker(5, 60_000)
241
+ tenantBreakers.set(key, breaker)
242
+ }
243
+ return breaker
244
+ }
245
+
246
+ // Usage:
247
+ // const breaker = getTenantBreaker('anthropic', org.id)
248
+ // await breaker.execute(() => summarize(client, text), fallback)
249
+
250
+ /** Shared Transport with Per-Tenant Keys — one connection pool, per-org auth.
251
+ * Avoids N connection pools for N orgs (~100 bytes overhead per org). */
252
+ const sharedTransport = new Anthropic() // Base client — shared pool
253
+
254
+ function getOrgClient(orgApiKey: string): Anthropic {
255
+ // Reuses the underlying transport; only overrides auth header
256
+ return new Anthropic({ apiKey: orgApiKey })
257
+ // OpenAI: new OpenAI({ apiKey: orgApiKey })
258
+ // Note: Anthropic SDK creates lightweight client instances.
259
+ // For providers supporting .withOptions(), prefer that to avoid any pool duplication.
260
+ }
261
+
262
+ /** API Key Fallback Chain — 3-tier resolution for provider credentials.
263
+ * (1) Org-specific from encrypted store → (2) Default org key → (3) Env var. */
264
+ interface CredentialStore {
265
+ get(orgId: string, provider: string): Promise<string | null>
266
+ getDefault(provider: string): Promise<string | null>
267
+ }
268
+
269
+ async function resolveApiKey(
270
+ orgId: string,
271
+ provider: string,
272
+ store: CredentialStore
273
+ ): Promise<string> {
274
+ // Tier 1: org-specific credential
275
+ const orgKey = await store.get(orgId, provider)
276
+ if (orgKey) return orgKey
277
+
278
+ // Tier 2: default org credential (shared across orgs without their own key)
279
+ const defaultKey = await store.getDefault(provider)
280
+ if (defaultKey) return defaultKey
281
+
282
+ // Tier 3: environment variable
283
+ const envMap: Record<string, string> = {
284
+ anthropic: 'ANTHROPIC_API_KEY',
285
+ openai: 'OPENAI_API_KEY',
286
+ }
287
+ const envKey = process.env[envMap[provider] ?? '']
288
+ if (envKey) return envKey
289
+
290
+ throw new Error(`No API key found for provider=${provider}, orgId=${orgId}`)
291
+ }
292
+
293
+ /** Credential Verification Probe — validate key before storing.
294
+ * Makes a lightweight API call (list models) to confirm the key works. */
295
+ async function verifyCredential(provider: string, apiKey: string): Promise<boolean> {
296
+ try {
297
+ if (provider === 'anthropic') {
298
+ const probe = new Anthropic({ apiKey })
299
+ // Minimal call — small max_tokens to burn near-zero quota
300
+ await probe.messages.create({
301
+ model: 'claude-sonnet-4-20250514',
302
+ max_tokens: 1,
303
+ messages: [{ role: 'user', content: 'ping' }],
304
+ })
305
+ }
306
+ // OpenAI: await new OpenAI({ apiKey }).models.list()
307
+ return true
308
+ } catch {
309
+ return false // Invalid key — do not store
310
+ }
311
+ }
312
+
313
+ /** Per-Tenant Cost Attribution — thread orgId through all usage recording. */
314
+ interface AiUsageRecord {
315
+ orgId: string
316
+ provider: string
317
+ model: string
318
+ inputTokens: number
319
+ outputTokens: number
320
+ costCents: number
321
+ timestamp: number
322
+ }
323
+
324
+ interface UsageSink {
325
+ record(entry: AiUsageRecord): void
326
+ }
327
+
328
+ function recordUsage(
329
+ sink: UsageSink,
330
+ orgId: string,
331
+ provider: string,
332
+ model: string,
333
+ inputTokens: number,
334
+ outputTokens: number,
335
+ costCents: number
336
+ ): void {
337
+ sink.record({ orgId, provider, model, inputTokens, outputTokens, costCents, timestamp: Date.now() })
338
+ }
339
+
340
+ // Usage:
341
+ // recordUsage(sink, org.id, 'anthropic', 'claude-sonnet-4-20250514', 320, 150, 2)