ai-inference-stepper 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (160) hide show
  1. package/.env.example +169 -0
  2. package/.eslintrc.cjs +23 -0
  3. package/.github/workflows/ci.yml +51 -0
  4. package/.github/workflows/keep-alive.yml +22 -0
  5. package/.github/workflows/publish.yml +34 -0
  6. package/ARCHITECTURE.md +594 -0
  7. package/Dockerfile +16 -0
  8. package/LICENSE +28 -0
  9. package/README.md +261 -0
  10. package/dist/alerts/discord.d.ts +19 -0
  11. package/dist/alerts/discord.d.ts.map +1 -0
  12. package/dist/alerts/discord.js +70 -0
  13. package/dist/alerts/discord.js.map +1 -0
  14. package/dist/cache/redisCache.d.ts +45 -0
  15. package/dist/cache/redisCache.d.ts.map +1 -0
  16. package/dist/cache/redisCache.js +171 -0
  17. package/dist/cache/redisCache.js.map +1 -0
  18. package/dist/cli.d.ts +3 -0
  19. package/dist/cli.d.ts.map +1 -0
  20. package/dist/cli.js +8 -0
  21. package/dist/cli.js.map +1 -0
  22. package/dist/config.d.ts +6 -0
  23. package/dist/config.d.ts.map +1 -0
  24. package/dist/config.js +251 -0
  25. package/dist/config.js.map +1 -0
  26. package/dist/fallback/templateFallback.d.ts +7 -0
  27. package/dist/fallback/templateFallback.d.ts.map +1 -0
  28. package/dist/fallback/templateFallback.js +29 -0
  29. package/dist/fallback/templateFallback.js.map +1 -0
  30. package/dist/index.d.ts +121 -0
  31. package/dist/index.d.ts.map +1 -0
  32. package/dist/index.js +198 -0
  33. package/dist/index.js.map +1 -0
  34. package/dist/logging.d.ts +10 -0
  35. package/dist/logging.d.ts.map +1 -0
  36. package/dist/logging.js +44 -0
  37. package/dist/logging.js.map +1 -0
  38. package/dist/metrics/metrics.d.ts +22 -0
  39. package/dist/metrics/metrics.d.ts.map +1 -0
  40. package/dist/metrics/metrics.js +78 -0
  41. package/dist/metrics/metrics.js.map +1 -0
  42. package/dist/providers/factory.d.ts +11 -0
  43. package/dist/providers/factory.d.ts.map +1 -0
  44. package/dist/providers/factory.js +52 -0
  45. package/dist/providers/factory.js.map +1 -0
  46. package/dist/providers/hfSpace.adapter.d.ts +21 -0
  47. package/dist/providers/hfSpace.adapter.d.ts.map +1 -0
  48. package/dist/providers/hfSpace.adapter.js +110 -0
  49. package/dist/providers/hfSpace.adapter.js.map +1 -0
  50. package/dist/providers/httpTemplate.adapter.d.ts +42 -0
  51. package/dist/providers/httpTemplate.adapter.d.ts.map +1 -0
  52. package/dist/providers/httpTemplate.adapter.js +98 -0
  53. package/dist/providers/httpTemplate.adapter.js.map +1 -0
  54. package/dist/providers/promptBuilder.d.ts +34 -0
  55. package/dist/providers/promptBuilder.d.ts.map +1 -0
  56. package/dist/providers/promptBuilder.js +315 -0
  57. package/dist/providers/promptBuilder.js.map +1 -0
  58. package/dist/providers/provider.interface.d.ts +45 -0
  59. package/dist/providers/provider.interface.d.ts.map +1 -0
  60. package/dist/providers/provider.interface.js +47 -0
  61. package/dist/providers/provider.interface.js.map +1 -0
  62. package/dist/providers/specs.d.ts +18 -0
  63. package/dist/providers/specs.d.ts.map +1 -0
  64. package/dist/providers/specs.js +326 -0
  65. package/dist/providers/specs.js.map +1 -0
  66. package/dist/providers/unified.adapter.d.ts +37 -0
  67. package/dist/providers/unified.adapter.d.ts.map +1 -0
  68. package/dist/providers/unified.adapter.js +141 -0
  69. package/dist/providers/unified.adapter.js.map +1 -0
  70. package/dist/queue/producer.d.ts +30 -0
  71. package/dist/queue/producer.d.ts.map +1 -0
  72. package/dist/queue/producer.js +87 -0
  73. package/dist/queue/producer.js.map +1 -0
  74. package/dist/queue/worker.d.ts +9 -0
  75. package/dist/queue/worker.d.ts.map +1 -0
  76. package/dist/queue/worker.js +137 -0
  77. package/dist/queue/worker.js.map +1 -0
  78. package/dist/server/app.d.ts +4 -0
  79. package/dist/server/app.d.ts.map +1 -0
  80. package/dist/server/app.js +394 -0
  81. package/dist/server/app.js.map +1 -0
  82. package/dist/server/start.d.ts +16 -0
  83. package/dist/server/start.d.ts.map +1 -0
  84. package/dist/server/start.js +45 -0
  85. package/dist/server/start.js.map +1 -0
  86. package/dist/stepper/orchestrator.d.ts +22 -0
  87. package/dist/stepper/orchestrator.d.ts.map +1 -0
  88. package/dist/stepper/orchestrator.js +333 -0
  89. package/dist/stepper/orchestrator.js.map +1 -0
  90. package/dist/types.d.ts +216 -0
  91. package/dist/types.d.ts.map +1 -0
  92. package/dist/types.js +14 -0
  93. package/dist/types.js.map +1 -0
  94. package/dist/utils/redaction.d.ts +9 -0
  95. package/dist/utils/redaction.d.ts.map +1 -0
  96. package/dist/utils/redaction.js +41 -0
  97. package/dist/utils/redaction.js.map +1 -0
  98. package/dist/utils/safeRequest.d.ts +38 -0
  99. package/dist/utils/safeRequest.d.ts.map +1 -0
  100. package/dist/utils/safeRequest.js +104 -0
  101. package/dist/utils/safeRequest.js.map +1 -0
  102. package/dist/validation/report.schema.d.ts +48 -0
  103. package/dist/validation/report.schema.d.ts.map +1 -0
  104. package/dist/validation/report.schema.js +72 -0
  105. package/dist/validation/report.schema.js.map +1 -0
  106. package/dist/webhooks/delivery.d.ts +31 -0
  107. package/dist/webhooks/delivery.d.ts.map +1 -0
  108. package/dist/webhooks/delivery.js +102 -0
  109. package/dist/webhooks/delivery.js.map +1 -0
  110. package/docs/assets/architecture.png +0 -0
  111. package/package.json +75 -0
  112. package/render.yaml +25 -0
  113. package/src/alerts/README.md +25 -0
  114. package/src/alerts/discord.ts +86 -0
  115. package/src/cache/How redis caching works in package stepper.md +971 -0
  116. package/src/cache/README.md +51 -0
  117. package/src/cache/redisCache.ts +194 -0
  118. package/src/ci/deploy.sh +36 -0
  119. package/src/cli.ts +9 -0
  120. package/src/config.ts +265 -0
  121. package/src/fallback/templateFallback.ts +32 -0
  122. package/src/index.ts +246 -0
  123. package/src/logging.ts +46 -0
  124. package/src/metrics/README.md +24 -0
  125. package/src/metrics/metrics.ts +84 -0
  126. package/src/providers/How the providers interact.md +121 -0
  127. package/src/providers/README.md +121 -0
  128. package/src/providers/factory.ts +57 -0
  129. package/src/providers/hfSpace.adapter.ts +119 -0
  130. package/src/providers/httpTemplate.adapter.ts +138 -0
  131. package/src/providers/promptBuilder.ts +330 -0
  132. package/src/providers/provider.interface.ts +73 -0
  133. package/src/providers/specs.ts +366 -0
  134. package/src/providers/unified.adapter.ts +172 -0
  135. package/src/queue/How queue works in package stepper.md +149 -0
  136. package/src/queue/README.md +41 -0
  137. package/src/queue/producer.ts +108 -0
  138. package/src/queue/worker.ts +170 -0
  139. package/src/server/app.ts +451 -0
  140. package/src/server/start.ts +68 -0
  141. package/src/stepper/Dockerfile +48 -0
  142. package/src/stepper/How orchestrator works in package stepper.md +746 -0
  143. package/src/stepper/README.md +43 -0
  144. package/src/stepper/orchestrator.ts +437 -0
  145. package/src/types.ts +238 -0
  146. package/src/utils/redaction.ts +50 -0
  147. package/src/utils/safeRequest.ts +140 -0
  148. package/src/validation/README.md +25 -0
  149. package/src/validation/report.schema.ts +96 -0
  150. package/src/webhooks/delivery.ts +162 -0
  151. package/tests/integration/full-flow.test.ts +192 -0
  152. package/tests/unit/alerts/discord.test.ts +119 -0
  153. package/tests/unit/cache.test.ts +87 -0
  154. package/tests/unit/orchestrator-fallback.test.ts +92 -0
  155. package/tests/unit/orchestrator.test.ts +105 -0
  156. package/tests/unit/providers/factory.test.ts +161 -0
  157. package/tests/unit/providers/unified.adapter.test.ts +206 -0
  158. package/tests/unit/utils/redaction.test.ts +140 -0
  159. package/tests/unit/utils/safeRequest.test.ts +164 -0
  160. package/tsconfig.json +26 -0
@@ -0,0 +1,43 @@
1
+ # 🎼 Inference Orchestrator
2
+
3
+ The Orchestrator is the "Head Chef" of the package. It coordinates the various AI providers, handles retries, and ensures that the user always receives a report, even if multiple providers fail.
4
+
5
+ ## 🎯 Purpose
6
+
7
+ - **Reliability**: Implements a rotation of providers. If one fails, it tries the next.
8
+ - **Resilience**: Uses **Circuit Breakers** and **Exponential Backoff**.
9
+ - **Efficiency**: Respects rate limits via **Bottleneck** (e.g., max 5 requests per minute).
10
+
11
+ ## 🛡️ Resilience Strategies
12
+
13
+ ### 1. Circuit Breaker
14
+
15
+ If a provider fails more than 50% of the time, the "circuit flips open." The Orchestrator will stop sending requests to that provider for 5 minutes to give it time to recover.
16
+
17
+ ### 2. Smart Retries
18
+
19
+ When a provider fails with a temporary error (like a network blip), the Orchestrator waits before trying again:
20
+
21
+ - **Base Delay**: 40 seconds.
22
+ - **Exponential Backoff**: Each retry waits longer than the last.
23
+ - **Jitter**: Adds randomness to avoid "thundering herd" problems.
24
+
25
+ ### 3. Rate Limiting
26
+
27
+ Controls the flow of requests.
28
+
29
+ - **Requests Per Minute (RPM)**: Default is 5.
30
+ - **Concurrency**: Default is 2 simultaneous requests.
31
+
32
+ ### 4. Fail-safe Fallback
33
+
34
+ If _all_ AI providers are down or timeout (after 1 minute), the Orchestrator generates a generic, high-quality template report based on the commit message. This ensures the user is never left with an empty result.
35
+
36
+ ## 📋 Core Functions
37
+
38
+ | Function | Description |
39
+ | ----------------------- | ----------------------------------------------------------------------- |
40
+ | `generateReportNow()` | The high-level entry point that manages the entire multi-provider flow. |
41
+ | `initializeProviders()` | Sets up the rate limiters and circuit breakers for each service. |
42
+ | `callWithRetries()` | Handles the low-level retry logic for a single provider. |
43
+ | `getProviderHealth()` | Returns the current status (Healthy/Broken) of all AI services. |
@@ -0,0 +1,437 @@
1
+
2
+ //packages/stepper/src/stepper/orchestrator.ts`
3
+
4
+ import Bottleneck from 'bottleneck';
5
+ import CircuitBreaker from 'opossum';
6
+ import { ProviderAdapter, ProviderError, AuthError, RateLimitError } from '../providers/provider.interface.js';
7
+ import { createProviderAdapter } from '../providers/factory.js';
8
+ import { PromptInput, ReportOutput, ProviderResult, ProviderAttemptMeta, StepperCallbacks, ProviderConfig, WebhookCallback } from '../types.js';
9
+ import { config } from '../config.js';
10
+ import { logger, createChildLogger } from '../logging.js';
11
+ import { generateTemplateFallback } from '../fallback/templateFallback.js';
12
+ import { recordProviderAttempt, recordProviderSuccess, recordProviderFailure } from '../metrics/metrics.js';
13
+ import { isRetryableError } from '../utils/safeRequest.js';
14
+ import { alertProviderFailure, alertCircuitOpen } from '../alerts/discord.js';
15
+
16
+ interface ProviderWithLimiter {
17
+ adapter: ProviderAdapter;
18
+ limiter: Bottleneck;
19
+ circuit: CircuitBreaker;
20
+ config: ProviderConfig;
21
+ consecutiveErrors: number;
22
+ }
23
+
24
+ let providers: ProviderWithLimiter[] = [];
25
+ let callbacks: StepperCallbacks = {};
26
+
27
+ /**
28
+ * Initialize providers with rate limiters and circuit breakers
29
+ */
30
+ export function initializeProviders(providerConfigs: ProviderConfig[] = config.providers): void {
31
+ providers = providerConfigs
32
+ .filter((pc) => pc.enabled)
33
+ .map((pc) => {
34
+ // Create adapter using factory
35
+ const adapter = createProviderAdapter(pc);
36
+ if (!adapter) {
37
+ throw new Error(`Failed to create adapter for provider ${pc.name}`);
38
+ }
39
+
40
+ // Create Bottleneck limiter
41
+ // Convert RPM (Requests Per Minute) to ms between requests
42
+ // Example: 5 RPM = 60000ms / 5 = 12000ms (12 seconds) between each request
43
+ // const minTime = 60000 / pc.rateLimitRPM;
44
+
45
+ // Convert RPM (Requests Per Minute) or RPS (Requests Per Second) to ms between requests
46
+ const minTime = pc.rateLimitRPS
47
+ ? 1000 / pc.rateLimitRPS
48
+ : 60000 / (pc.rateLimitRPM || 5);
49
+
50
+ const limiter = new Bottleneck({
51
+ maxConcurrent: pc.concurrency,
52
+ minTime: Math.ceil(minTime),
53
+ });
54
+
55
+ // Create circuit breaker
56
+ const circuit = new CircuitBreaker(async (input: PromptInput) => adapter.call(input), {
57
+ timeout: pc.timeout || 15000,
58
+ errorThresholdPercentage: 50,
59
+ resetTimeout: config.circuit.cooldownSeconds * 1000,
60
+ volumeThreshold: config.circuit.failureThreshold,
61
+ rollingCountTimeout: config.circuit.windowSeconds * 1000,
62
+ });
63
+
64
+ circuit.on('open', () => {
65
+ logger.warn({ provider: pc.name }, 'Circuit breaker opened');
66
+ void alertCircuitOpen(pc.name);
67
+ });
68
+
69
+ circuit.on('halfOpen', () => {
70
+ logger.info({ provider: pc.name }, 'Circuit breaker half-open, trying probe');
71
+ });
72
+
73
+ circuit.on('close', () => {
74
+ logger.info({ provider: pc.name }, 'Circuit breaker closed');
75
+ });
76
+
77
+ return { adapter, limiter, circuit, config: pc, consecutiveErrors: 0 };
78
+ });
79
+
80
+ logger.info({ providerCount: providers.length, names: providers.map((p) => p.config.name) }, 'Providers initialized');
81
+ }
82
+
83
+ /**
84
+ * Register lifecycle callbacks
85
+ */
86
+ export function registerCallbacks(cbs: StepperCallbacks): void {
87
+ callbacks = { ...callbacks, ...cbs };
88
+ }
89
+
90
+ /**
91
+ * Get backoff delay with jitter
92
+ */
93
+ function getBackoffDelay(attempt: number): number {
94
+ const base = config.retry.baseDelayMs;
95
+ const jitter = Math.floor(Math.random() * config.retry.maxJitterMs);
96
+ return base * Math.pow(2, attempt) + jitter;
97
+ }
98
+
99
+ /**
100
+ * Sleep helper
101
+ */
102
+ function sleep(ms: number): Promise<void> {
103
+ return new Promise((resolve) => setTimeout(resolve, ms));
104
+ }
105
+
106
+ /**
107
+ * Generic callback result interface
108
+ */
109
+ interface CallbackResult {
110
+ url: string;
111
+ success: boolean;
112
+ statusCode?: number;
113
+ error?: string;
114
+ }
115
+
116
+ /**
117
+ * Send a single callback with retry support
118
+ * Stepper remains agnostic - just sends raw JSON to the URL
119
+ */
120
+ async function sendCallback(
121
+ callback: WebhookCallback,
122
+ payload: unknown
123
+ ): Promise<CallbackResult> {
124
+ const maxAttempts = callback.retry?.maxAttempts ?? 3;
125
+ const backoffMs = callback.retry?.backoffMs ?? 1000;
126
+
127
+ for (let attempt = 1; attempt <= maxAttempts; attempt++) {
128
+ try {
129
+ const response = await fetch(callback.url, {
130
+ method: 'POST',
131
+ headers: {
132
+ 'Content-Type': 'application/json',
133
+ 'User-Agent': 'Stepper/1.0',
134
+ 'X-Stepper-Timestamp': Date.now().toString(),
135
+ ...callback.headers,
136
+ },
137
+ body: JSON.stringify(payload),
138
+ signal: AbortSignal.timeout(10000),
139
+ });
140
+
141
+ if (response.ok) {
142
+ logger.info({ url: callback.url, attempt }, 'Callback succeeded');
143
+ return { url: callback.url, success: true, statusCode: response.status };
144
+ }
145
+
146
+ // Retry on server errors or rate limits
147
+ if ((response.status >= 500 || response.status === 429) && attempt < maxAttempts) {
148
+ const delay = backoffMs * Math.pow(2, attempt - 1);
149
+ logger.warn({ url: callback.url, status: response.status, delay }, 'Retrying callback');
150
+ await sleep(delay);
151
+ continue;
152
+ }
153
+
154
+ logger.error({ url: callback.url, status: response.status }, 'Callback failed');
155
+ return { url: callback.url, success: false, statusCode: response.status };
156
+ } catch (error) {
157
+ if (attempt < maxAttempts) {
158
+ const delay = backoffMs * Math.pow(2, attempt - 1);
159
+ logger.warn({ url: callback.url, error: error instanceof Error ? error.message : String(error), delay }, 'Callback error, retrying');
160
+ await sleep(delay);
161
+ continue;
162
+ }
163
+ return {
164
+ url: callback.url,
165
+ success: false,
166
+ error: error instanceof Error ? error.message : String(error),
167
+ };
168
+ }
169
+ }
170
+
171
+ return { url: callback.url, success: false, error: 'Max attempts exceeded' };
172
+ }
173
+
174
+ /**
175
+ * Execute all configured callbacks with raw result payload
176
+ * Stepper remains agnostic - callers decide what to do with the result
177
+ */
178
+ async function executeCallbacks(
179
+ callbacks: WebhookCallback[],
180
+ payload: {
181
+ success: boolean;
182
+ result?: ReportOutput;
183
+ error?: string;
184
+ metadata: {
185
+ jobId: string;
186
+ userId: string;
187
+ commitSha: string;
188
+ repo: string;
189
+ provider?: string;
190
+ generationTimeMs?: number;
191
+ timestamp: string;
192
+ };
193
+ }
194
+ ): Promise<CallbackResult[]> {
195
+ const results: CallbackResult[] = [];
196
+
197
+ for (const callback of callbacks) {
198
+ const result = await sendCallback(callback, payload);
199
+ results.push(result);
200
+
201
+ if (!result.success && !callback.continueOnFailure) {
202
+ logger.warn({ url: callback.url }, 'Callback failed, stopping chain');
203
+ break;
204
+ }
205
+ }
206
+
207
+ return results;
208
+ }
209
+
210
+ /**
211
+ * Call provider with retries
212
+ */
213
+ async function callWithRetries(
214
+ provider: ProviderWithLimiter,
215
+ input: PromptInput,
216
+ jobId: string
217
+ ): Promise<{ result: ReportOutput; durationMs: number }> {
218
+ const maxAttempts = config.retry.maxAttemptsPerProvider;
219
+ const log = createChildLogger({ provider: provider.config.name, jobId });
220
+
221
+ for (let attempt = 0; attempt < maxAttempts; attempt++) {
222
+ const startTime = Date.now();
223
+
224
+ try {
225
+ // Use circuit breaker - the result type from Opossum is unknown, but we know
226
+ // it returns ReportOutput since the circuit wraps adapter.call(input)
227
+ const result = await provider.circuit.fire(input) as ReportOutput;
228
+ const durationMs = Date.now() - startTime;
229
+
230
+ log.debug({ attempt, durationMs }, 'Provider call succeeded');
231
+ return { result, durationMs };
232
+ } catch (error) {
233
+ const durationMs = Date.now() - startTime;
234
+ log.warn({ attempt, error: error instanceof Error ? error.message : String(error), durationMs }, 'Provider call failed');
235
+
236
+ // Don't retry auth errors
237
+ if (error instanceof AuthError) {
238
+ log.error({ error: error.message }, 'Auth error - stopping retries');
239
+ throw error;
240
+ }
241
+
242
+ // Handle rate limits with Retry-After
243
+ if (error instanceof RateLimitError) {
244
+ // Use the AI service's requested wait time, or fallback to config (90 minutes default)
245
+ const retryAfter = error.retryAfter || config.retry.rateLimitFallbackSeconds;
246
+ log.info({ retryAfterSeconds: retryAfter, attempt }, 'Rate limited, backing off');
247
+
248
+ if (attempt < maxAttempts - 1) {
249
+ await sleep(retryAfter * 1000);
250
+ continue;
251
+ }
252
+ throw error;
253
+ }
254
+
255
+ // Retry on retryable errors
256
+ if (isRetryableError(error) && attempt < maxAttempts - 1) {
257
+ const delay = getBackoffDelay(attempt);
258
+ log.debug({ delay, attempt }, 'Retrying after backoff');
259
+ await sleep(delay);
260
+ continue;
261
+ }
262
+
263
+ throw error;
264
+ }
265
+ }
266
+
267
+ throw new Error('Max retries exceeded');
268
+ }
269
+
270
+ /**
271
+ * Safe callback invocation
272
+ */
273
+ async function invokeCallback<T extends keyof StepperCallbacks>(
274
+ name: T,
275
+ ...args: Parameters<NonNullable<StepperCallbacks[T]>>
276
+ ): Promise<void> {
277
+ const callback = callbacks[name];
278
+ if (!callback) return;
279
+
280
+ try {
281
+ await (callback as (...args: unknown[]) => void | Promise<void>)(...args);
282
+ } catch (error) {
283
+ logger.error({ callback: name, error }, 'Callback threw error');
284
+ }
285
+ }
286
+
287
+ /**
288
+ * Generate report using provider orchestration
289
+ */
290
+ export async function generateReportNow(input: PromptInput, jobId: string = 'immediate'): Promise<ProviderResult> {
291
+ const log = createChildLogger({ jobId, userId: input.userId, commitSha: input.commitSha });
292
+ const startTime = Date.now();
293
+ const providersAttempted: ProviderAttemptMeta[] = [];
294
+
295
+ await invokeCallback('onStart', jobId, input);
296
+
297
+ // Ensure providers are initialized
298
+ if (providers.length === 0) {
299
+ initializeProviders();
300
+ }
301
+
302
+ // Try each provider in order
303
+ for (const provider of providers) {
304
+ const providerName = provider.config.name;
305
+
306
+ // Check circuit breaker state
307
+ if (provider.circuit.opened) {
308
+ log.info({ provider: providerName }, 'Skipping provider - circuit open');
309
+ providersAttempted.push({
310
+ provider: providerName,
311
+ attemptNumber: 0,
312
+ skipped: 'circuit_open',
313
+ });
314
+ continue;
315
+ }
316
+
317
+ let attemptNumber = 0;
318
+
319
+ try {
320
+ attemptNumber++;
321
+ log.info({ provider: providerName, attempt: attemptNumber }, 'Attempting provider');
322
+
323
+ await invokeCallback('onProviderAttempt', jobId, providerName, attemptNumber, {
324
+ provider: providerName,
325
+ attemptNumber,
326
+ });
327
+
328
+ recordProviderAttempt(providerName);
329
+
330
+ // Schedule with rate limiter and call with retries
331
+ const { result, durationMs } = await provider.limiter.schedule(() =>
332
+ callWithRetries(provider, input, jobId)
333
+ );
334
+
335
+ // Success!
336
+ provider.consecutiveErrors = 0;
337
+ const totalMs = Date.now() - startTime;
338
+ log.info({ provider: providerName, totalMs, providerMs: durationMs }, 'Report generated successfully');
339
+
340
+ recordProviderSuccess(providerName, durationMs);
341
+ providersAttempted.push({
342
+ provider: providerName,
343
+ attemptNumber,
344
+ durationMs,
345
+ });
346
+
347
+ await invokeCallback('onSuccess', jobId, providerName, result, {
348
+ timings: { totalMs, providerMs: durationMs },
349
+ });
350
+
351
+ // Execute configured callbacks immediately after success
352
+ // This ensures delivery even if subsequent DB operations fail
353
+ if (input.callbacks && input.callbacks.length > 0) {
354
+ const callbackPayload = {
355
+ success: true,
356
+ result,
357
+ metadata: {
358
+ jobId,
359
+ userId: input.userId,
360
+ commitSha: input.commitSha,
361
+ repo: input.repo,
362
+ provider: providerName,
363
+ generationTimeMs: totalMs,
364
+ timestamp: new Date().toISOString(),
365
+ },
366
+ };
367
+
368
+ executeCallbacks(input.callbacks, callbackPayload)
369
+ .then((callbackResults: CallbackResult[]) => {
370
+ log.info({ callbackResults: callbackResults.map(r => ({ url: r.url, success: r.success })) }, 'Callbacks executed');
371
+ })
372
+ .catch((err: unknown) => {
373
+ log.error({ error: err instanceof Error ? err.message : String(err) }, 'Callbacks execution error');
374
+ });
375
+ }
376
+
377
+ return {
378
+ result,
379
+ usedProvider: providerName,
380
+ providersAttempted,
381
+ fallback: false,
382
+ timings: { totalMs, providerMs: durationMs },
383
+ };
384
+ } catch (error) {
385
+ const errorMessage = error instanceof Error ? error.message : String(error);
386
+ const errorCode = error instanceof ProviderError ? error.type : 'UNKNOWN';
387
+
388
+ log.warn({ provider: providerName, error: errorMessage, errorCode }, 'Provider failed');
389
+
390
+ // Update consecutive errors
391
+ provider.consecutiveErrors = (provider.consecutiveErrors || 0) + 1;
392
+
393
+ recordProviderFailure(providerName, errorCode);
394
+ providersAttempted.push({
395
+ provider: providerName,
396
+ attemptNumber,
397
+ error: errorMessage,
398
+ errorCode,
399
+ });
400
+
401
+ // Alert on failure
402
+ void alertProviderFailure(providerName, provider.consecutiveErrors, error);
403
+
404
+ // Continue to next provider
405
+ continue;
406
+ }
407
+ }
408
+
409
+ // All providers failed
410
+ const totalMs = Date.now() - startTime;
411
+ log.error({ totalMs, providersAttempted: providersAttempted.length }, 'All providers failed, job will be retried');
412
+
413
+ if (!config.fallback.enabled) {
414
+ throw new Error(`All ${providersAttempted.length} provider(s) failed. Job will retry.`);
415
+ }
416
+
417
+ const fallbackResult = generateTemplateFallback(input);
418
+ await invokeCallback('onFallback', jobId, fallbackResult, { providersAttempted });
419
+ return {
420
+ result: fallbackResult,
421
+ usedProvider: 'fallback',
422
+ providersAttempted,
423
+ fallback: true,
424
+ timings: { totalMs },
425
+ };
426
+ }
427
+
428
+ /**
429
+ * Get provider health status
430
+ */
431
+ export function getProviderHealth(): Array<{ name: string; circuitOpen: boolean; healthy: boolean }> {
432
+ return providers.map((p) => ({
433
+ name: p.config.name,
434
+ circuitOpen: p.circuit.opened,
435
+ healthy: !p.circuit.opened,
436
+ }));
437
+ }
package/src/types.ts ADDED
@@ -0,0 +1,238 @@
1
+ // types.ts - Stepper Type Definitions
2
+
3
+ /**
4
+ * Generic webhook callback configuration
5
+ * Stepper sends raw results to these URLs - callers handle transformation
6
+ */
7
+ export interface WebhookCallback {
8
+ /** Callback URL to send results */
9
+ url: string;
10
+ /** Custom headers (auth tokens, content-type, etc.) */
11
+ headers?: Record<string, string>;
12
+ /** Continue to next callback even if this one fails */
13
+ continueOnFailure?: boolean;
14
+ /** Retry configuration */
15
+ retry?: {
16
+ maxAttempts: number;
17
+ backoffMs: number;
18
+ };
19
+ }
20
+
21
+ /**
22
+ * Input to generate a commit report
23
+ */
24
+ export interface PromptInput {
25
+ userId: string;
26
+ commitSha: string;
27
+ repo: string;
28
+ message: string;
29
+ files: string[];
30
+ components: string[];
31
+ diffSummary: string;
32
+ template?: string;
33
+
34
+ /**
35
+ * Multiple webhook callbacks for resilience
36
+ * Stepper will call each in order, sending the raw result
37
+ * Use continueOnFailure: true to ensure all callbacks are attempted
38
+ */
39
+ callbacks?: WebhookCallback[];
40
+ }
41
+
42
+ /**
43
+ * Structured report output from AI providers
44
+ */
45
+ export interface ReportOutput {
46
+ title: string;
47
+ summary: string;
48
+ changes: string[];
49
+ rationale: string;
50
+ impact_and_tests: string;
51
+ next_steps: string[];
52
+ tags: string;
53
+ }
54
+
55
+ /**
56
+ * Provider attempt result
57
+ */
58
+ export interface ProviderResult {
59
+ result: ReportOutput;
60
+ usedProvider: string;
61
+ providersAttempted: ProviderAttemptMeta[];
62
+ fallback: boolean;
63
+ timings: {
64
+ totalMs: number;
65
+ providerMs?: number;
66
+ };
67
+ }
68
+
69
+ /**
70
+ * Metadata for each provider attempt
71
+ */
72
+ export interface ProviderAttemptMeta {
73
+ provider: string;
74
+ attemptNumber: number;
75
+ error?: string;
76
+ errorCode?: string;
77
+ durationMs?: number;
78
+ skipped?: string;
79
+ }
80
+
81
+ /**
82
+ * Cache entry structure
83
+ */
84
+ export interface CacheEntry {
85
+ status: 'hydrated' | 'dehydrated' | 'failed';
86
+ result?: ReportOutput;
87
+ jobId?: string;
88
+ providersAttempted?: ProviderAttemptMeta[];
89
+ timestamps: {
90
+ created: string;
91
+ updated: string;
92
+ };
93
+ ttl?: number;
94
+ etag?: string;
95
+ fallback?: boolean;
96
+ error?: string;
97
+ }
98
+
99
+ /**
100
+ * Job data for BullMQ
101
+ */
102
+ export interface ReportJobData {
103
+ jobId: string;
104
+ input: PromptInput;
105
+ cacheKey: string;
106
+ priority?: number;
107
+ callbackUrl?: string;
108
+ }
109
+
110
+ /**
111
+ * Provider error types
112
+ */
113
+ export enum ProviderErrorType {
114
+ RateLimit = 'RATE_LIMIT',
115
+ Auth = 'AUTH_ERROR',
116
+ Timeout = 'TIMEOUT',
117
+ Unavailable = 'UNAVAILABLE',
118
+ InvalidResponse = 'INVALID_RESPONSE',
119
+ Unknown = 'UNKNOWN',
120
+ }
121
+
122
+ /**
123
+ * Lifecycle callbacks for stepper events
124
+ */
125
+ export interface StepperCallbacks {
126
+ onEnqueue?: (jobId: string, meta: { input: PromptInput; cacheKey: string }) => void | Promise<void>;
127
+ onStart?: (jobId: string, input: PromptInput) => void | Promise<void>;
128
+ onProviderAttempt?: (
129
+ jobId: string,
130
+ providerName: string,
131
+ attemptNumber: number,
132
+ meta: ProviderAttemptMeta
133
+ ) => void | Promise<void>;
134
+ onSuccess?: (
135
+ jobId: string,
136
+ providerName: string,
137
+ result: ReportOutput,
138
+ meta: { timings: { totalMs: number; providerMs?: number } }
139
+ ) => void | Promise<void>;
140
+ onFallback?: (
141
+ jobId: string,
142
+ result: ReportOutput,
143
+ meta: { providersAttempted: ProviderAttemptMeta[] }
144
+ ) => void | Promise<void>;
145
+ onFailure?: (
146
+ jobId: string,
147
+ errors: ProviderAttemptMeta[],
148
+ meta: { lastError?: string }
149
+ ) => void | Promise<void>;
150
+ }
151
+
152
+ /**
153
+ * Provider configuration
154
+ */
155
+ export interface ProviderConfig {
156
+ name: string;
157
+ enabled: boolean;
158
+ baseUrl?: string;
159
+ modelName?: string;
160
+ apiKey?: string;
161
+ apiKeyEnvVar?: string;
162
+ rateLimitRPM?: number; // Requests Per Minute
163
+ rateLimitRPS?: number; // Requests Per Second
164
+ concurrency: number;
165
+ timeout?: number;
166
+ }
167
+
168
+ /**
169
+ * Stepper configuration
170
+ */
171
+ export interface StepperConfig {
172
+ providers: ProviderConfig[];
173
+ providerConfigs?: ProviderConfig[];
174
+ fallback: {
175
+ enabled: boolean;
176
+ };
177
+ redis: {
178
+ url: string;
179
+ keyPrefix: string;
180
+ };
181
+ cache: {
182
+ ttlSeconds: number;
183
+ staleThresholdSeconds: number;
184
+ enableStaleWhileRevalidate: boolean;
185
+ };
186
+ queue: {
187
+ name: string;
188
+ concurrency: number;
189
+ };
190
+ webhook: {
191
+ enabled: boolean;
192
+ secret: string;
193
+ maxRetries: number;
194
+ retryDelayMs: number;
195
+ };
196
+ retry: {
197
+ maxAttemptsPerProvider: number;
198
+ baseDelayMs: number;
199
+ maxJitterMs: number;
200
+ rateLimitFallbackSeconds: number;
201
+ };
202
+ circuit: {
203
+ failureThreshold: number;
204
+ windowSeconds: number;
205
+ cooldownSeconds: number;
206
+ };
207
+ security: {
208
+ redactBeforeSend: boolean;
209
+ // CORS configuration
210
+ cors: {
211
+ enabled: boolean;
212
+ allowedOrigins: string[];
213
+ allowCredentials: boolean;
214
+ };
215
+ // Rate limiting configuration
216
+ rateLimit: {
217
+ enabled: boolean;
218
+ windowMs: number; // Time window in milliseconds
219
+ maxRequests: number; // Max requests per window per IP
220
+ maxRequestsPerUser: number; // Max requests per window per userId
221
+ skipHealthEndpoints: boolean; // Skip rate limiting for /health and /metrics
222
+ };
223
+ // Helmet security headers
224
+ helmet: {
225
+ enabled: boolean;
226
+ };
227
+ // API Key authentication
228
+ apiKey: {
229
+ enabled: boolean;
230
+ headerName: string; // e.g., 'x-api-key'
231
+ skipHealthEndpoints: boolean; // Skip auth for /health and /metrics
232
+ };
233
+ };
234
+ server: {
235
+ port: number;
236
+ metricsPort?: number;
237
+ };
238
+ }